import process from 'node:process';
import { FetchError, getErrorMessage } from '../errors.js';
import { transformHtmlToMarkdownInProcess } from '../transform.js';
const send = process.send?.bind(process);
if (!send) throw new Error('transform-child started without IPC channel');
const controllersById = new Map<string, AbortController>();
const decoder = new TextDecoder('utf-8');
function postError(id: string, url: string, error: unknown): void {
if (error instanceof FetchError) {
send?.({
type: 'error',
id,
error: {
name: error.name,
message: error.message,
url: error.url,
statusCode: error.statusCode,
details: { ...error.details },
},
});
return;
}
send?.({
type: 'error',
id,
error: {
name: error instanceof Error ? error.name : 'Error',
message: getErrorMessage(error),
url,
},
});
}
function isValidMessage(msg: Record<string, unknown>): msg is {
id: string;
url: string;
html?: string;
htmlBuffer?: Uint8Array;
encoding?: string;
includeMetadata: boolean;
skipNoiseRemoval?: boolean;
} {
const {
id,
url,
html,
htmlBuffer,
encoding,
includeMetadata,
skipNoiseRemoval,
} = msg;
if (typeof id !== 'string') return false;
if (typeof url !== 'string') return false;
if (typeof includeMetadata !== 'boolean') return false;
if (html !== undefined && typeof html !== 'string') return false;
if (htmlBuffer !== undefined && !(htmlBuffer instanceof Uint8Array))
return false;
if (encoding !== undefined && typeof encoding !== 'string') return false;
if (skipNoiseRemoval !== undefined && typeof skipNoiseRemoval !== 'boolean')
return false;
return true;
}
function postValidationError(id: string, url: string, message: string): void {
send?.({
type: 'error',
id,
error: { name: 'ValidationError', message, url },
});
}
function decodeHtml(
html: string | undefined,
htmlBuffer: Uint8Array | undefined,
encoding: string | undefined
): string {
if (!htmlBuffer) return html ?? '';
if (!encoding || encoding === 'utf-8') return decoder.decode(htmlBuffer);
return new TextDecoder(encoding).decode(htmlBuffer);
}
function handleTransform(msg: Record<string, unknown>): void {
if (!isValidMessage(msg)) return;
const {
id,
url,
html,
htmlBuffer,
encoding,
includeMetadata,
skipNoiseRemoval,
} = msg;
if (!id.trim()) {
postValidationError(id, url || '', 'Missing transform message id');
return;
}
if (!url.trim()) {
postValidationError(id, url, 'Missing transform URL');
return;
}
const controller = new AbortController();
controllersById.set(id, controller);
try {
const content = decodeHtml(html, htmlBuffer, encoding);
const result = transformHtmlToMarkdownInProcess(content, url, {
includeMetadata,
signal: controller.signal,
...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
});
const { markdown, title, truncated } = result;
send?.({
type: 'result',
id,
result:
title === undefined
? { markdown, truncated }
: { markdown, title, truncated },
});
} catch (error: unknown) {
postError(id, url, error);
} finally {
controllersById.delete(id);
}
}
process.on('message', (raw: unknown) => {
if (!raw || typeof raw !== 'object') return;
const msg = raw as Record<string, unknown>;
if (msg.type === 'cancel') {
if (typeof msg.id !== 'string') return;
const controller = controllersById.get(msg.id);
if (controller) controller.abort(new Error('Canceled'));
return;
}
if (msg.type === 'transform') {
handleTransform(msg);
}
});