xml.ts•3.45 kB
import { SupportedFileType } from '@superglue/shared';
import sax from 'sax';
import { Readable } from 'stream';
import { DetectionPriority, FileParsingStrategy } from '../strategy.js';
export class XMLStrategy implements FileParsingStrategy {
readonly fileType = SupportedFileType.XML;
readonly priority = DetectionPriority.STRUCTURED_TEXT;
canHandle(buffer: Buffer): boolean {
try {
const sampleSize = Math.min(buffer.length, 4096);
const sample = buffer.subarray(0, sampleSize).toString('utf8').trim();
if (!sample.startsWith('<?xml') && !sample.startsWith('<')) {
return false;
}
return sample.includes('</') || sample.includes('/>');
} catch {
return false;
}
}
async parse(buffer: Buffer): Promise<any> {
return parseXML(buffer);
}
}
export async function parseXML(buffer: Buffer): Promise<any> {
const results: any = {};
let currentElement: any = null;
const elementStack: any[] = [];
return new Promise((resolve, reject) => {
const parser = sax.createStream(false);
parser.on('opentag', (node) => {
const newElement: any = node.attributes || {};
if (currentElement && typeof currentElement === 'object') {
elementStack.push(currentElement);
}
else if (currentElement && typeof currentElement === 'string') {
elementStack.push({ _TEXT: currentElement });
}
else {
elementStack.push({});
}
currentElement = newElement;
});
parser.on('text', (text) => {
if (!currentElement || text?.trim()?.length == 0) {
return;
}
if (typeof currentElement !== 'object' || currentElement === null || Array.isArray(currentElement)) {
return;
}
if (Object.keys(currentElement)?.length > 0) {
currentElement["_TEXT"] = text.trim();
}
else if (Array.isArray(currentElement)) {
currentElement.push(text.trim());
}
else if (typeof currentElement === "string") {
currentElement = [currentElement, text.trim()];
}
else {
currentElement = text.trim();
}
});
parser.on('closetag', (tagName) => {
let parentElement = elementStack.pop();
if (parentElement == null) {
parentElement = results;
}
if (currentElement) {
if (!parentElement[tagName]) {
parentElement[tagName] = currentElement;
}
else if (Array.isArray(parentElement[tagName])) {
parentElement[tagName].push(currentElement);
}
else {
parentElement[tagName] = [parentElement[tagName], currentElement];
}
}
currentElement = parentElement;
});
parser.on('error', (err) => {
console.warn('XML parsing warning (continuing):', err.message);
});
parser.on('end', async () => {
resolve(currentElement);
});
const readStream = Readable.from(buffer);
readStream.pipe(parser);
});
}