index.spec.ts•6.08 kB
import { describe, it, expect, afterEach } from 'vitest';
import * as cheerio from './index.js';
import { Writable } from 'node:stream';
import { createServer, type Server, type RequestListener } from 'node:http';
function noop() {
// Ignore
}
// Returns a promise and a resolve function
function getPromise() {
let cb!: (error: Error | null | undefined, $: cheerio.CheerioAPI) => void;
const promise = new Promise<cheerio.CheerioAPI>((resolve, reject) => {
cb = (error, $) => (error ? reject(error) : resolve($));
});
return { promise, cb };
}
const TEST_HTML = '<h1>Hello World</h1><a href="link">Example</a>';
const TEST_HTML_UTF16 = Buffer.from(TEST_HTML, 'utf16le');
const TEST_HTML_UTF16_BOM = Buffer.from([
// UTF16-LE BOM
0xff,
0xfe,
...Array.from(TEST_HTML_UTF16),
]);
describe('loadBuffer', () => {
it('should parse UTF-8 HTML', () => {
const $ = cheerio.loadBuffer(Buffer.from(TEST_HTML));
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should parse UTF-16 HTML', () => {
const $ = cheerio.loadBuffer(TEST_HTML_UTF16_BOM);
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
});
describe('stringStream', () => {
it('should use parse5 by default', async () => {
const { promise, cb } = getPromise();
const stream = cheerio.stringStream({}, cb);
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML);
const $ = await promise;
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should error from parse5 on buffer', () => {
const stream = cheerio.stringStream({}, noop);
expect(stream).toBeInstanceOf(Writable);
expect(() => stream.write(Buffer.from(TEST_HTML))).toThrow(
'Parser can work only with string streams.',
);
});
it('should use htmlparser2 for XML', async () => {
const { promise, cb } = getPromise();
const stream = cheerio.stringStream({ xmlMode: true }, cb);
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML);
const $ = await promise;
expect($.html()).toBe(TEST_HTML);
});
});
describe('decodeStream', () => {
it('should use parse5 by default', async () => {
const { promise, cb } = getPromise();
const stream = cheerio.decodeStream({}, cb);
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML_UTF16_BOM);
const $ = await promise;
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
expect($('a').prop('href')).toBe('link');
});
it('should use htmlparser2 for XML', async () => {
const { promise, cb } = getPromise();
const stream = cheerio.decodeStream({ xmlMode: true }, cb);
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML_UTF16_BOM);
const $ = await promise;
expect($.html()).toBe(TEST_HTML);
});
});
describe('fromURL', () => {
let server: Server | undefined;
function createTestServer(
contentType: string,
body: string | Buffer,
handler: RequestListener = (_req, res) => {
res.writeHead(200, { 'Content-Type': contentType });
res.end(body);
},
): Promise<number> {
return new Promise((resolve, reject) => {
server = createServer(handler);
server.listen(0, () => {
const address = server?.address();
if (typeof address === 'string' || address == null) {
reject(new Error('Failed to get port'));
} else {
resolve(address.port);
}
});
});
}
afterEach(
async () =>
new Promise<void>((resolve, reject) => {
if (server) {
server.close((err) => (err ? reject(err) : resolve()));
server = undefined;
} else {
resolve();
}
}),
);
it('should fetch UTF-8 HTML', async () => {
const port = await createTestServer('text/html', TEST_HTML);
const $ = await cheerio.fromURL(`http://localhost:${port}`);
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should fetch UTF-16 HTML', async () => {
const port = await createTestServer(
'text/html; charset=utf-16le',
TEST_HTML_UTF16,
);
const $ = await cheerio.fromURL(`http://localhost:${port}`);
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should parse XML based on Content-Type', async () => {
const port = await createTestServer('text/xml', TEST_HTML);
const $ = await cheerio.fromURL(`http://localhost:${port}`);
expect($.html()).toBe(TEST_HTML);
});
it('should throw on non-HTML/XML Content-Type', async () => {
const port = await createTestServer('text/plain', TEST_HTML);
await expect(cheerio.fromURL(`http://localhost:${port}`)).rejects.toThrow(
'The content-type "text/plain" is neither HTML nor XML.',
);
});
it('should throw on non-2xx responses', async () => {
const port = await createTestServer('text/html', TEST_HTML, (_, res) => {
res.writeHead(500);
res.end();
});
await expect(cheerio.fromURL(`http://localhost:${port}`)).rejects.toThrow(
'Response Error',
);
});
it('should follow redirects', async () => {
let redirected = false;
const port = await createTestServer('text/html', TEST_HTML, (req, res) => {
if (redirected) {
expect(req.url).toBe('/final/path');
res.writeHead(200, { 'Content-Type': 'text/html' });
res.end(TEST_HTML);
} else {
expect(req.url).toBe('/first');
redirected = true;
res.writeHead(302, { Location: `http://localhost:${port}/final/path` });
res.end();
}
});
const $ = await cheerio.fromURL(`http://localhost:${port}/first`);
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
expect($('a').prop('href')).toBe(`http://localhost:${port}/final/link`);
});
});