index.ts•2.95 kB
import { SupportedFileType } from '@superglue/shared';
import { CSVStrategy, parseCSV } from './parsers/csv.js';
import { DOCXStrategy, parseDOCX } from './parsers/docx.js';
import { ExcelStrategy, parseExcel } from './parsers/excel.js';
import { GZIPStrategy, parseGZIP, setGzipParseFileFunction } from './parsers/gzip.js';
import { JSONStrategy, parseJSON } from './parsers/json.js';
import { PDFStrategy, parsePDF } from './parsers/pdf.js';
import { XMLStrategy, parseXML } from './parsers/xml.js';
import { setZipParseFileFunction, ZIPStrategy, parseZIP } from './parsers/zip.js';
import { FileStrategyRegistry } from './strategy.js';
const fileStrategyRegistry = new FileStrategyRegistry();
fileStrategyRegistry.register(new GZIPStrategy()); // Priority 1: GZIP
fileStrategyRegistry.register(new ExcelStrategy()); // Priority 2: ZIP_BASED_SPECIFIC
fileStrategyRegistry.register(new DOCXStrategy()); // Priority 2: ZIP_BASED_SPECIFIC
fileStrategyRegistry.register(new PDFStrategy()); // Priority 10: BINARY_SIGNATURE
fileStrategyRegistry.register(new ZIPStrategy()); // Priority 11: ZIP_GENERIC
fileStrategyRegistry.register(new JSONStrategy()); // Priority 20: STRUCTURED_TEXT
fileStrategyRegistry.register(new XMLStrategy()); // Priority 20: STRUCTURED_TEXT
fileStrategyRegistry.register(new CSVStrategy()); // Priority 30: HEURISTIC_TEXT
export async function parseFile(buffer: Buffer, fileType: SupportedFileType = SupportedFileType.AUTO): Promise<any> {
if (!buffer || buffer.length === 0) return null;
switch (fileType) {
case SupportedFileType.JSON:
return parseJSON(buffer);
case SupportedFileType.CSV:
return parseCSV(buffer);
case SupportedFileType.XML:
return parseXML(buffer);
case SupportedFileType.EXCEL:
return parseExcel(buffer);
case SupportedFileType.PDF:
return parsePDF(buffer);
case SupportedFileType.DOCX:
return parseDOCX(buffer);
case SupportedFileType.GZIP:
return parseGZIP(buffer);
case SupportedFileType.ZIP:
return parseZIP(buffer);
case SupportedFileType.RAW:
return buffer.toString('utf8');
case SupportedFileType.AUTO:
return (await fileStrategyRegistry.detectAndParse(buffer)).data;
default:
throw new Error(`Unsupported file type: ${fileType}`);
}
}
// Inject parseFile function into GZIP and ZIP parsers to avoid circular imports
setGzipParseFileFunction(parseFile);
setZipParseFileFunction(parseFile);
export * from './parsers/csv.js';
export * from './parsers/docx.js';
export * from './parsers/excel.js';
export * from './parsers/gzip.js';
export * from './parsers/json.js';
export * from './parsers/pdf.js';
export * from './parsers/xml.js';
export * from './parsers/zip.js';
export * from './strategy.js';