Skip to main content
Glama

doc-ops-mcp

by Tele-AI
ooxmlParser.ts62.3 kB
/** * 自定义OOXML解析器 - 基于OOXML.md方案 * 完全不依赖外部独立软件,只使用Node.js包(JSZip、xml2js等) * 可以达到95-97%的格式还原率 */ const fs = require('fs').promises; const JSZip = require('jszip'); const xml2js = require('xml2js'); import path from 'path'; interface StyleDefinition { type: string; name: string; basedOn: string | null; css: Record<string, string>; } interface NumberingLevel { start: string; numFmt: string; text: string; indent: Record<string, string>; } interface NumberingDefinition { [level: string]: NumberingLevel; } interface RunNode { type: 'run' | 'break'; text: string; style: Record<string, string>; } interface ImageNode { type: 'image'; src: string; alt: string; style: Record<string, string>; } interface HyperlinkNode { type: 'hyperlink'; href: string; children: (RunNode | ImageNode)[]; } type ChildNode = RunNode | ImageNode | HyperlinkNode; interface ParagraphNode { type: 'paragraph'; style: Record<string, string>; children: ChildNode[]; numbering: { numId: string; level: string } | null; } interface TableCellNode { type: 'tableCell'; style: Record<string, string>; children: ParagraphNode[]; colspan: number; rowspan: number; } interface TableRowNode { type: 'tableRow'; style: Record<string, string>; cells: TableCellNode[]; } interface TableNode { type: 'table'; style: Record<string, string>; rows: TableRowNode[]; } class OOXMLParser { private parser: any; private styles: Record<string, StyleDefinition>; private numbering: Record<string, NumberingDefinition>; private relationships: Record<string, string>; private themes: Record<string, any>; private settings: Record<string, any>; private media: Record<string, any>; private styleCache: Map<string, any>; private numberingCache: Map<string, string>; private outputDir: string; private cacheDir: string; private imageCache: Map<string, string>; // 图片缓存映射 // 移除了listCounters,现在使用HTML原生列表功能 private tempImageId = 0; constructor() { this.parser = new xml2js.Parser({ explicitArray: false, mergeAttrs: true, normalize: true, normalizeTags: true }); // 存储解析后的数据 this.styles = {}; this.numbering = {}; this.relationships = {}; this.themes = {}; this.settings = {}; this.media = {}; // 样式映射缓存 this.styleCache = new Map(); this.numberingCache = new Map(); this.imageCache = new Map(); // 移除了listCounters初始化 // 设置缓存目录和输出目录 this.cacheDir = process.env.CACHE_DIR || process.env.OUTPUT_DIR || ''; this.outputDir = process.env.OUTPUT_DIR || ''; } /** * 主解析入口 */ async parseDocx(docxPath: string, outputDir?: string): Promise<{ html: string; css: string; media: Record<string, any> }> { try { console.log('🚀 开始自定义OOXML解析器转换...'); // 设置输出目录和缓存目录 if (outputDir) { this.outputDir = outputDir; } else if (!this.outputDir) { // 如果没有设置输出目录,使用临时目录 this.outputDir = path.dirname(docxPath); } // 设置缓存目录,优先使用环境变量CACHE_DIR if (!this.cacheDir) { this.cacheDir = process.env.CACHE_DIR || this.outputDir; } console.log('📂 缓存目录:', this.cacheDir); console.log('📂 输出目录:', this.outputDir); // 1. 读取并解压DOCX文件 const data = await fs.readFile(docxPath); const zip = await JSZip.loadAsync(data); // 2. 并行加载所有必要的XML文件 await Promise.all([ this.loadStyles(zip), this.loadNumbering(zip), this.loadRelationships(zip), this.loadThemes(zip), this.loadSettings(zip), this.loadMedia(zip) ]); // 3. 解析主文档 const documentXml = await this.getFileContent(zip, 'word/document.xml'); if (!documentXml) { throw new Error('无法找到主文档 word/document.xml'); } const document = await this.parseXml(documentXml); // 4. 构建DOM树 const dom = await this.buildDocumentDOM(document); // 5. 生成HTML const html = this.renderToHTML(dom); // 6. 复制图片到输出目录 await this.copyImagesToOutput(); console.log('✅ 自定义OOXML解析器转换完成'); console.log(`📊 处理了 ${this.imageCache.size} 张图片`); return { html, css: this.generateCSS(), media: this.media }; } catch (error: any) { console.error('❌ OOXML解析器转换失败:', error); throw error; } } async saveImageToCache(filename: string, base64: string, relationId: string): Promise<string> { // 检查是否已经缓存过这个图片 if (this.imageCache.has(relationId)) { const cachedPath = this.imageCache.get(relationId)!; console.log(`📸 使用缓存图片: ${relationId} -> ${cachedPath}`); return cachedPath; } // 创建缓存目录下的 images 子目录 const imagesDir = path.join(this.cacheDir, 'images'); await fs.mkdir(imagesDir, { recursive: true }); // 保证唯一文件名 const ext = filename.split('.').pop() || 'png'; const basename = path.basename(filename, '.' + ext); const uniq = `${basename}_${Date.now()}_${this.tempImageId++}.${ext}`; const filepath = path.join(imagesDir, uniq); // 写入图片到缓存目录 const buffer = Buffer.from(base64, 'base64'); await fs.writeFile(filepath, buffer); // 生成绝对路径(file:// 协议) const relativePath = `file://${filepath}`; // 缓存映射关系 this.imageCache.set(relationId, relativePath); console.log(`💾 图片已缓存: ${relationId} -> ${filepath}`); console.log(`🔗 相对路径: ${relativePath}`); return relativePath; } /** * 复制缓存图片到输出目录 */ async copyImagesToOutput(): Promise<void> { // 由于图片路径已经是从输出目录到缓存目录的相对路径,不需要复制 console.log('📸 图片使用相对路径,无需复制'); return; } // 保留原有的复制逻辑作为备用 async copyImagesToOutputOld(): Promise<void> { if (this.cacheDir === this.outputDir || this.imageCache.size === 0) { console.log('📸 图片已在正确位置或无图片需要复制'); return; } console.log(`📸 开始复制 ${this.imageCache.size} 张图片到输出目录...`); // 创建输出目录下的images文件夹 const outputImagesDir = path.join(this.outputDir, 'images'); await fs.mkdir(outputImagesDir, { recursive: true }); for (const [relationId, relativePath] of this.imageCache) { try { const sourcePath = path.join(this.cacheDir, relativePath); const targetPath = path.join(this.outputDir, relativePath); // 检查源文件是否存在 try { await fs.access(sourcePath); } catch { console.log(`⚠️ 源图片不存在: ${sourcePath}`); continue; } // 复制文件 await fs.copyFile(sourcePath, targetPath); console.log(`✅ 图片已复制: ${relativePath}`); } catch (error) { console.error(`❌ 复制图片失败 ${relativePath}:`, error); } } console.log('📸 图片复制完成'); } /** * 加载样式定义 */ async loadStyles(zip: any): Promise<void> { try { const stylesXml = await this.getFileContent(zip, 'word/styles.xml'); if (!stylesXml) { console.log('⚠️ 未找到样式文件,使用默认样式'); return; } const result = await this.parseXml(stylesXml); if (result && result['w:styles']) { const styles = result['w:styles']; // 解析文档默认样式 if (styles['w:docdefaults']) { this.styles.defaults = this.parseDocDefaults(styles['w:docdefaults']); } // 解析所有样式定义 const styleList = Array.isArray(styles['w:style']) ? styles['w:style'] : [styles['w:style']].filter(Boolean); styleList.forEach(style => { if (style && style['w:styleid']) { const styleId = style['w:styleid']; this.styles[styleId] = this.parseStyleDefinition(style); } }); console.log(`📊 解析了 ${Object.keys(this.styles).length} 个样式`); } } catch (e: any) { console.log('⚠️ 样式文件解析失败,使用默认样式:', e.message); } } /** * 解析文档默认样式 */ parseDocDefaults(docDefaults: any): StyleDefinition { const def: StyleDefinition = { type: 'default', name: 'Document Defaults', basedOn: null, css: {} }; // 解析默认段落属性 if (docDefaults['w:pprdefault'] && docDefaults['w:pprdefault']['w:ppr']) { Object.assign(def.css, this.parseParagraphProperties(docDefaults['w:pprdefault']['w:ppr'])); } // 解析默认字符属性 if (docDefaults['w:rprdefault'] && docDefaults['w:rprdefault']['w:rpr']) { Object.assign(def.css, this.parseRunProperties(docDefaults['w:rprdefault']['w:rpr'])); } return def; } /** * 解析样式定义 */ parseStyleDefinition(style: any): StyleDefinition { const def: StyleDefinition = { type: style['w:type'] || 'paragraph', name: style['w:name'] ? style['w:name']['w:val'] : '', basedOn: style['w:basedon'] ? style['w:basedon']['w:val'] : null, css: {} }; // 解析段落属性 if (style['w:ppr']) { Object.assign(def.css, this.parseParagraphProperties(style['w:ppr'])); } // 解析字符属性 if (style['w:rpr']) { Object.assign(def.css, this.parseRunProperties(style['w:rpr'])); } return def; } /** * 解析段落属性 */ parseParagraphProperties(pPr: any): Record<string, string> { const css: Record<string, string> = {}; // 对齐方式 if (pPr['w:jc']) { const align = pPr['w:jc']['w:val']; css['text-align'] = this.mapAlignment(align); } // 缩进 if (pPr['w:ind']) { const ind = pPr['w:ind']; if (ind['w:left']) { css['margin-left'] = this.twipToPixel(ind['w:left']) + 'px'; } if (ind['w:right']) { css['margin-right'] = this.twipToPixel(ind['w:right']) + 'px'; } if (ind['w:firstline']) { css['text-indent'] = this.twipToPixel(ind['w:firstline']) + 'px'; } } // 行距 - 优化行距计算 if (pPr['w:spacing']) { const spacing = pPr['w:spacing']; if (spacing['w:line']) { const lineValue = parseInt(spacing['w:line']); // 改善行距计算,使其更接近Word的显示效果 if (spacing['w:lineRule'] === 'exact') { css['line-height'] = this.twipToPixel(lineValue) + 'px'; } else { // 自动行距或最小行距 css['line-height'] = Math.max(1.0, lineValue / 240) + ''; } } if (spacing['w:before']) { css['margin-top'] = Math.max(0, this.twipToPixel(spacing['w:before'])) + 'px'; } if (spacing['w:after']) { css['margin-bottom'] = Math.max(0, this.twipToPixel(spacing['w:after'])) + 'px'; } } // 边框 if (pPr['w:pbdr']) { css.border = this.parseBorders(pPr['w:pbdr']); } // 背景色 if (pPr['w:shd']) { const shd = pPr['w:shd']; const fill = shd['w:fill'] || shd.fill; if (fill && fill !== 'auto' && fill !== 'auto') { // 确保颜色值是有效的十六进制 const cleanColor = fill.replace('#', ''); if (/^[0-9A-Fa-f]{6}$/.test(cleanColor)) { css['background-color'] = '#' + cleanColor; } } } // 处理运行属性中的字体大小 if (pPr['w:rPr']) { const runProps = this.parseRunProperties(pPr['w:rPr']); Object.assign(css, runProps); } return css; } /** * 解析字符属性 */ parseRunProperties(rPr: any): Record<string, string> { const css: Record<string, string> = {}; // 字体 if (rPr['w:rfonts']) { const fonts = rPr['w:rfonts']; const fontList: string[] = []; // 优先使用ASCII字体,然后是东亚字体 if (fonts['w:ascii']) fontList.push(`"${fonts['w:ascii']}"`); if (fonts['w:eastAsia'] && fonts['w:eastAsia'] !== fonts['w:ascii']) { fontList.push(`"${fonts['w:eastAsia']}"`); } if (fonts['w:hAnsi'] && fonts['w:hAnsi'] !== fonts['w:ascii']) { fontList.push(`"${fonts['w:hAnsi']}"`); } // 添加默认字体 fontList.push('"Microsoft YaHei"', '"SimSun"', 'sans-serif'); if (fontList.length > 0) { css['font-family'] = fontList.join(', '); } } // 字号 - 修复字号解析 if (rPr['w:sz']) { const size = rPr['w:sz']['w:val'] || rPr['w:sz']; if (size) { const sizeNum = parseInt(size); if (!isNaN(sizeNum) && sizeNum > 0) { css['font-size'] = (sizeNum / 2) + 'pt'; } } } // 东亚字体字号 if (rPr['w:szCs']) { const size = rPr['w:szCs']['w:val'] || rPr['w:szCs']; if (size && !css['font-size']) { const sizeNum = parseInt(size); if (!isNaN(sizeNum) && sizeNum > 0) { css['font-size'] = (sizeNum / 2) + 'pt'; } } } // 加粗 - 修复逻辑 if (rPr['w:b']) { const boldVal = rPr['w:b']['w:val']; // 如果没有val属性,或者val为true/1,则为粗体 if (boldVal === undefined || boldVal === 'true' || boldVal === '1' || boldVal === true) { css['font-weight'] = 'bold'; } } // 东亚字体加粗 if (rPr['w:bCs']) { const boldVal = rPr['w:bCs']['w:val']; if (boldVal === undefined || boldVal === 'true' || boldVal === '1' || boldVal === true) { css['font-weight'] = 'bold'; } } // 斜体 if (rPr['w:i']) { const v = rPr['w:i']['w:val']; if (typeof v === 'undefined' || v === 'true' || v === '1') { css['font-style'] = 'italic'; } } // 下划线 if (rPr['w:u']) { const uType = rPr['w:u']['w:val'] || 'single'; if (uType !== 'none') { css['text-decoration'] = 'underline'; if (uType === 'double') { css['text-decoration-style'] = 'double'; } } } // 删除线 - 修复逻辑,只有明确设置为true或存在且没有val属性时才添加删除线 if (rPr['w:strike']) { const strikeVal = rPr['w:strike']['w:val']; if (strikeVal === undefined || strikeVal === 'true' || strikeVal === '1') { if (css['text-decoration']) { css['text-decoration'] += ' line-through'; } else { css['text-decoration'] = 'line-through'; } } } // 颜色 if (rPr['w:color']) { const color = rPr['w:color']['w:val'] || rPr['w:color']; if (color && color !== 'auto' && color !== 'auto') { // 确保颜色值是有效的十六进制 const cleanColor = color.replace('#', ''); if (/^[0-9A-Fa-f]{6}$/.test(cleanColor)) { css['color'] = '#' + cleanColor; } } } // 高亮 if (rPr['w:highlight']) { const highlight = rPr['w:highlight']['w:val'] || rPr['w:highlight']; if (highlight) { css['background-color'] = this.mapHighlightColor(highlight); } } // 背景色(shading) if (rPr['w:shd']) { const shd = rPr['w:shd']; const fill = shd['w:fill'] || shd.fill; if (fill && fill !== 'auto') { const cleanColor = fill.replace('#', ''); if (/^[0-9A-Fa-f]{6}$/.test(cleanColor)) { css['background-color'] = '#' + cleanColor; } } } // 上下标 if (rPr['w:vertAlign']) { const align = rPr['w:vertAlign']['w:val']; if (align === 'superscript') { css['vertical-align'] = 'super'; css['font-size'] = '0.8em'; } else if (align === 'subscript') { css['vertical-align'] = 'sub'; css['font-size'] = '0.8em'; } } return css; } /** * 解析边框 */ parseBorders(borders: any): string { const borderStyles: string[] = []; ['top', 'bottom', 'left', 'right'].forEach(side => { const key = `w:${side}`; if (borders[key]) { const border = borders[key]; const width = this.eighthPointToPixel(border['w:sz']) || 1; const color = border['w:color'] || '000000'; const style = this.mapBorderStyle(border['w:val']) || 'solid'; borderStyles.push(`border-${side}: ${width}px ${style} #${color}`); } }); return borderStyles.join('; '); } /** * 加载编号/列表定义 */ async loadNumbering(zip: any): Promise<void> { try { const numberingXml = await this.getFileContent(zip, 'word/numbering.xml'); if (!numberingXml) { console.log('⚠️ 未找到编号文件'); return; } const result = await this.parseXml(numberingXml); // 尝试不同的属性名格式 let numbering = null; if (result && result['w:numbering']) { numbering = result['w:numbering']; } else if (result && result['numbering']) { numbering = result['numbering']; } if (numbering) { // 解析抽象编号定义 let abstractNums: any = numbering['w:abstractnum'] || numbering['abstractnum']; if (abstractNums) { abstractNums = Array.isArray(abstractNums) ? abstractNums : [abstractNums].filter(Boolean); abstractNums.forEach(abstractNum => { const id = abstractNum['w:abstractNumId'] || abstractNum['w:abstractnumid'] || abstractNum['abstractnumid'] || abstractNum['abstractNumId']; if (id) { this.numbering[id] = this.parseAbstractNumbering(abstractNum); } }); } // 解析编号实例 let nums: any = numbering['w:num'] || numbering['num']; if (nums) { nums = Array.isArray(nums) ? nums : [nums].filter(Boolean); nums.forEach(num => { const numId = num['w:numId'] || num['w:numid'] || num['numid'] || num['numId']; const abstractNumId = num['w:abstractNumId'] || num['w:abstractnumid'] || num['abstractnumid'] || num['abstractNumId']; const abstractId = abstractNumId ? (abstractNumId['w:val'] || abstractNumId['val'] || abstractNumId) : null; if (numId && abstractId) { this.numberingCache.set(numId, abstractId); } }); } console.log(`📊 解析了 ${Object.keys(this.numbering).length} 个编号定义`); } } catch (e: any) { console.log('⚠️ 编号文件解析失败:', e.message); } } /** * 解析抽象编号定义 */ parseAbstractNumbering(abstractNum: any): NumberingDefinition { const levels: NumberingDefinition = {}; if (abstractNum['w:lvl']) { const lvls = Array.isArray(abstractNum['w:lvl']) ? abstractNum['w:lvl'] : [abstractNum['w:lvl']].filter(Boolean); lvls.forEach(lvl => { const level = lvl['w:ilvl'] || '0'; levels[level] = { start: lvl['w:start'] ? lvl['w:start']['w:val'] : '1', numFmt: lvl['w:numfmt'] ? lvl['w:numfmt']['w:val'] : 'decimal', text: lvl['w:lvltext'] ? lvl['w:lvltext']['w:val'] : '', indent: lvl['w:ppr'] ? this.parseParagraphProperties(lvl['w:ppr']) : {} }; }); } return levels; } /** * 加载关系定义 */ async loadRelationships(zip: any): Promise<void> { try { const relsXml = await this.getFileContent(zip, 'word/_rels/document.xml.rels'); if (!relsXml) { console.log('⚠️ 未找到关系文件'); return; } const result = await this.parseXml(relsXml); // 尝试不同的属性名格式(大小写) let relationshipArray = null; if (result && result['Relationships'] && result['Relationships']['Relationship']) { relationshipArray = result['Relationships']['Relationship']; } else if (result && result['relationships'] && result['relationships']['relationship']) { relationshipArray = result['relationships']['relationship']; } if (relationshipArray) { const relationships = Array.isArray(relationshipArray) ? relationshipArray : [relationshipArray].filter(Boolean); relationships.forEach(rel => { if (rel['Id'] && rel['Target']) { this.relationships[rel['Id']] = rel['Target']; } }); console.log(`📊 解析了 ${Object.keys(this.relationships).length} 个关系`); } } catch (e: any) { console.log('⚠️ 关系文件解析失败:', e.message); } } /** * 加载主题定义 */ async loadThemes(zip: any): Promise<void> { try { const themeXml = await this.getFileContent(zip, 'word/theme/theme1.xml'); if (!themeXml) { console.log('⚠️ 未找到主题文件'); return; } const result = await this.parseXml(themeXml); this.themes = result || {}; console.log('📊 主题文件解析完成'); } catch (e: any) { console.log('⚠️ 主题文件解析失败:', e.message); } } /** * 加载设置定义 */ async loadSettings(zip: any): Promise<void> { try { const settingsXml = await this.getFileContent(zip, 'word/settings.xml'); if (!settingsXml) { console.log('⚠️ 未找到设置文件'); return; } const result = await this.parseXml(settingsXml); this.settings = result || {}; console.log('📊 设置文件解析完成'); } catch (e: any) { console.log('⚠️ 设置文件解析失败:', e.message); } } /** * 加载媒体文件 */ async loadMedia(zip: any): Promise<void> { try { const mediaFiles = Object.keys(zip.files).filter(filename => filename.startsWith('word/media/') ); for (const filename of mediaFiles) { const file = zip.files[filename]; if (!file.dir) { const data = await file.async('base64'); this.media[filename] = data; } } console.log(`📊 解析了 ${Object.keys(this.media).length} 个媒体文件`); } catch (e: any) { console.log('⚠️ 媒体文件解析失败:', e.message); } } /** * 构建文档DOM */ async buildDocumentDOM(document: any): Promise<any[]> { const body = document['w:document']['w:body']; const dom: any[] = []; // 如果body是数组,按顺序处理所有元素 if (Array.isArray(body)) { for (const element of body) { await this.processBodyElement(element, dom); } } else { // 如果body是对象,需要按照XML中的顺序处理元素 // 先收集所有元素并按照它们在XML中的顺序排序 const elements: Array<{type: string, data: any, order: number}> = []; let order = 0; // 处理段落 if (body['w:p']) { const paragraphs = Array.isArray(body['w:p']) ? body['w:p'] : [body['w:p']].filter(Boolean); for (const p of paragraphs) { elements.push({type: 'paragraph', data: p, order: order++}); } } // 处理表格 if (body['w:tbl']) { const tables = Array.isArray(body['w:tbl']) ? body['w:tbl'] : [body['w:tbl']].filter(Boolean); for (const tbl of tables) { elements.push({type: 'table', data: tbl, order: order++}); } } // 按顺序处理元素 elements.sort((a, b) => a.order - b.order); for (const element of elements) { if (element.type === 'paragraph') { const paragraph = await this.buildParagraph(element.data); if (paragraph) dom.push(paragraph); } else if (element.type === 'table') { const table = await this.buildTable(element.data); if (table) dom.push(table); } } } return dom; } /** * 处理body中的单个元素 */ async processBodyElement(element: any, dom: any[]): Promise<void> { if (element['w:p']) { const paragraph = await this.buildParagraph(element['w:p']); if (paragraph) dom.push(paragraph); } else if (element['w:tbl']) { const table = await this.buildTable(element['w:tbl']); if (table) dom.push(table); } } /** * 构建段落DOM */ async buildParagraph(p: any): Promise<ParagraphNode | null> { if (!p) return null; const paragraph: ParagraphNode = { type: 'paragraph', style: {}, children: [], numbering: null }; // 解析段落属性 if (p['w:ppr']) { paragraph.style = this.parseParagraphProperties(p['w:ppr']); // 检查是否是列表项 if (p['w:ppr']['w:numpr']) { paragraph.numbering = this.parseNumberingProperties(p['w:ppr']['w:numpr']); } // 检查段落样式引用 if (p['w:ppr']['w:pstyle']) { const styleId = p['w:ppr']['w:pstyle']['w:val']; if (this.styles[styleId]) { paragraph.style = { ...this.styles[styleId].css, ...paragraph.style }; } } } // 解析文本内容 if (p['w:r']) { const runs = Array.isArray(p['w:r']) ? p['w:r'] : [p['w:r']].filter(Boolean); for (const r of runs) { const run = await this.buildRun(r); if (run) paragraph.children.push(run); } } // 处理特殊元素(如超链接) if (p['w:hyperlink']) { const hyperlinks = Array.isArray(p['w:hyperlink']) ? p['w:hyperlink'] : [p['w:hyperlink']].filter(Boolean); for (const link of hyperlinks) { const hyperlinkNode = await this.buildHyperlink(link); if (hyperlinkNode) paragraph.children.push(hyperlinkNode); } } return paragraph; } /** * 解析编号属性 */ parseNumberingProperties(numPr: any): { numId: string; level: string } | null { if (!numPr) return null; const numId = numPr['w:numid'] ? numPr['w:numid']['w:val'] : null; const level = numPr['w:ilvl'] ? numPr['w:ilvl']['w:val'] : '0'; if (numId) { return { numId, level }; } return null; } /** * 构建文本运行DOM */ async buildRun(r: any): Promise<RunNode | ImageNode | null> { if (!r) return null; const run: RunNode = { type: 'run', text: '', style: {} }; // 解析运行属性 if (r['w:rpr']) { run.style = this.parseRunProperties(r['w:rpr']); // 检查样式引用 if (r['w:rpr']['w:rstyle']) { const styleId = r['w:rpr']['w:rstyle']['w:val']; if (this.styles[styleId]) { run.style = { ...this.styles[styleId].css, ...run.style }; } } } // 提取文本 if (r['w:t']) { if (typeof r['w:t'] === 'string') { run.text = r['w:t']; } else if (r['w:t']['_']) { run.text = r['w:t']['_']; } else if (r['w:t']['#text']) { run.text = r['w:t']['#text']; } } // 处理制表符 if (r['w:tab']) { run.text += '\t'; } // 处理换行 if (r['w:br']) { run.type = 'break'; } // 处理图片 if (r['w:drawing'] || r['w:pict']) { return await this.buildImage(r); } return run; } /** * 构建图片DOM */ async buildImage(r: any): Promise<ImageNode | null> { const image: ImageNode = { type: 'image', src: '', alt: '', style: {} }; console.log(`🖼️ 开始处理图片节点:`, JSON.stringify(r, null, 2)); // 处理 w:pict/w:imageData if (r['w:pict'] && r['w:pict']['v:imagedata']) { const imgData = r['w:pict']['v:imagedata']; const relId = imgData['r:id']; console.log(`📸 处理 w:pict 图片,关系ID: ${relId}`); image.src = await this.getImageData(relId); image.alt = imgData['o:title'] || imgData['alt'] || ''; } // 从drawing中提取图片 if (r['w:drawing']) { const drawing = r['w:drawing']; const inline = drawing['wp:inline'] || drawing['wp:anchor']; console.log(`🎨 处理 w:drawing 图片`); if (inline) { // 获取尺寸 const extent = inline['wp:extent']; if (extent) { const width = this.emuToPixel(extent['cx']); const height = this.emuToPixel(extent['cy']); image.style['width'] = width + 'px'; image.style['height'] = height + 'px'; console.log(`📏 图片尺寸: ${width}x${height}px`); } // 获取图片数据 const blip = this.findBlip(inline); if (blip && blip['r:embed']) { const relationId = blip['r:embed']; console.log(`🔗 找到图片关系ID: ${relationId}`); image.src = await this.getImageData(relationId); } else { console.log(`⚠️ 未找到图片blip或关系ID`); } } } console.log(`🖼️ 图片处理结果:`, { src: image.src, alt: image.alt, style: image.style }); // 如果没有找到图片源,返回null if (!image.src) { console.log(`❌ 图片源为空,跳过此图片`); return null; } return image; } /** * 查找图片blip元素 */ findBlip(element: any): any { if (!element) return null; // 递归查找blip元素 if (element['a:blip']) { return element['a:blip']; } for (const key in element) { if (typeof element[key] === 'object') { const result = this.findBlip(element[key]); if (result) return result; } } return null; } /** * 获取图片数据 - 使用缓存机制 */ async getImageData(relationId: string): Promise<string> { const target = this.relationships[relationId]; if (!target) { console.log(`⚠️ 未找到关系ID: ${relationId}`); return ''; } console.log(`🖼️ 处理图片关系: ${relationId} -> ${target}`); // 多路径尝试 const possiblePaths = [ `word/${target}`, target, `word/media/${target.split('/').pop()}`, `word/media/${target}` ]; console.log(`🔍 尝试路径:`, possiblePaths); console.log(`📁 可用媒体文件:`, Object.keys(this.media)); for (const mediaPath of possiblePaths) { const mediaData = this.media[mediaPath]; if (mediaData) { console.log(`✅ 找到图片数据: ${mediaPath}`); try { // 获取图片文件名 const filename = path.basename(target); // 保存图片到缓存并返回相对路径 const storedPath = await this.saveImageToCache(filename, mediaData, relationId); return storedPath; } catch (error) { console.error(`❌ 保存图片失败:`, error); return ''; } } } console.log(`❌ 未找到图片数据: ${target}`); return ''; } /** * 获取图片MIME类型 */ getImageMimeType(filename: string): string { const ext = filename.split('.').pop()?.toLowerCase(); const mimeTypes: Record<string, string> = { 'png': 'image/png', 'jpg': 'image/jpeg', 'jpeg': 'image/jpeg', 'gif': 'image/gif', 'bmp': 'image/bmp', 'svg': 'image/svg+xml' }; return mimeTypes[ext || ''] || 'image/png'; } /** * 构建超链接DOM */ async buildHyperlink(link: any): Promise<HyperlinkNode> { const hyperlink: HyperlinkNode = { type: 'hyperlink', href: '', children: [] }; // 获取链接地址 if (link['r:id']) { const relationId = link['r:id']; hyperlink.href = this.relationships[relationId] || ''; } // 处理链接内容 if (link['w:r']) { const runs = Array.isArray(link['w:r']) ? link['w:r'] : [link['w:r']].filter(Boolean); for (const r of runs) { const run = await this.buildRun(r); if (run) hyperlink.children.push(run); } } return hyperlink; } /** * 构建表格DOM */ async buildTable(tbl: any): Promise<TableNode | null> { if (!tbl) return null; const table: TableNode = { type: 'table', style: {}, rows: [] }; // 解析表格属性 if (tbl['w:tblpr']) { table.style = this.parseTableProperties(tbl['w:tblpr']); } // 解析表格行 if (tbl['w:tr']) { const rows = Array.isArray(tbl['w:tr']) ? tbl['w:tr'] : [tbl['w:tr']].filter(Boolean); for (const tr of rows) { const row = await this.buildTableRow(tr); if (row) table.rows.push(row); } } return table; } /** * 解析表格属性 */ parseTableProperties(tblPr: any): Record<string, string> { const css: Record<string, string> = { 'border-collapse': 'collapse', 'width': '100%' }; // 表格宽度 if (tblPr['w:tblw']) { const width = tblPr['w:tblw']; if (width['w:type'] === 'pct') { css['width'] = (parseInt(width['w:w']) / 50) + '%'; } else if (width['w:type'] === 'dxa') { css['width'] = this.twipToPixel(width['w:w']) + 'px'; } } // 表格边框 if (tblPr['w:tblborders']) { const borders = tblPr['w:tblborders']; css['border'] = this.parseTableBorders(borders); } // 表格对齐 if (tblPr['w:jc']) { const align = tblPr['w:jc']['w:val']; if (align === 'center') { css['margin'] = '0 auto'; } else if (align === 'right') { css['margin-left'] = 'auto'; } } return css; } /** * 解析表格边框 */ parseTableBorders(borders: any): string { const borderStyles: string[] = []; ['top', 'bottom', 'left', 'right'].forEach(side => { const key = `w:${side}`; if (borders[key]) { const border = borders[key]; const width = this.eighthPointToPixel(border['w:sz']) || 1; const color = border['w:color'] || '000000'; const style = this.mapBorderStyle(border['w:val']) || 'solid'; borderStyles.push(`border-${side}: ${width}px ${style} #${color}`); } }); return borderStyles.join('; '); } /** * 构建表格行DOM */ async buildTableRow(tr: any): Promise<TableRowNode | null> { if (!tr) return null; const row: TableRowNode = { type: 'tableRow', style: {}, cells: [] }; // 解析行属性 if (tr['w:trpr']) { row.style = this.parseTableRowProperties(tr['w:trpr']); } // 解析单元格 if (tr['w:tc']) { const cells = Array.isArray(tr['w:tc']) ? tr['w:tc'] : [tr['w:tc']].filter(Boolean); for (const tc of cells) { const cell = await this.buildTableCell(tc); if (cell) row.cells.push(cell); } } return row; } /** * 解析表格行属性 */ parseTableRowProperties(trPr: any): Record<string, string> { const css: Record<string, string> = {}; // 行高 if (trPr['w:trheight']) { const height = trPr['w:trheight']; if (height['w:val']) { css['height'] = this.twipToPixel(height['w:val']) + 'px'; } } return css; } /** * 构建表格单元格DOM */ async buildTableCell(tc: any): Promise<TableCellNode | null> { if (!tc) return null; const cell: TableCellNode = { type: 'tableCell', style: {}, children: [], colspan: 1, rowspan: 1 }; // 解析单元格属性 if (tc['w:tcpr']) { const tcPr = tc['w:tcpr']; // 合并单元格 if (tcPr['w:gridspan']) { cell.colspan = parseInt(tcPr['w:gridspan']['w:val']); } if (tcPr['w:vmerge']) { if (!tcPr['w:vmerge']['w:val']) { cell.rowspan = 1; // 标记为需要计算 } else { cell.rowspan = 0; // 被合并的单元格 } } // 单元格宽度 if (tcPr['w:tcw']) { const width = tcPr['w:tcw']; if (width['w:type'] === 'pct') { cell.style['width'] = (parseInt(width['w:w']) / 50) + '%'; } else if (width['w:type'] === 'dxa') { cell.style['width'] = this.twipToPixel(width['w:w']) + 'px'; } } // 单元格边框 if (tcPr['w:tcborders']) { cell.style['border'] = this.parseTableBorders(tcPr['w:tcborders']); } // 背景色 if (tcPr['w:shd']) { const fill = tcPr['w:shd']['w:fill']; if (fill && fill !== 'auto') { cell.style['background-color'] = '#' + fill; } } } // 解析单元格内容 if (tc['w:p']) { const paragraphs = Array.isArray(tc['w:p']) ? tc['w:p'] : [tc['w:p']].filter(Boolean); for (const p of paragraphs) { const paragraph = await this.buildParagraph(p); if (paragraph) cell.children.push(paragraph); } } return cell; } /** * 渲染DOM为HTML */ renderToHTML(dom: any[]): string { const htmlParts: string[] = []; const listStack: string[] = []; // 用于跟踪列表嵌套 let lastWasListItem = false; for (let i = 0; i < dom.length; i++) { const node = dom[i]; const html = this.renderNode(node, listStack); if (html) { // 检测当前是否为列表项 const isCurrentListItem = html.includes('<li '); // 如果上一个是列表项,当前不是列表项,需要关闭列表 if (lastWasListItem && !isCurrentListItem && listStack.length > 0) { const listType = listStack.pop(); if (listType) { htmlParts.push(`</${listType}>`); } } htmlParts.push(html); lastWasListItem = isCurrentListItem; } } // 关闭所有未关闭的列表 while (listStack.length > 0) { const listType = listStack.pop(); if (listType) { htmlParts.push(`</${listType}>`); } } return this.wrapHTML(htmlParts.join('\n')); } /** * 渲染单个节点 */ renderNode(node: any, listStack: string[] = []): string { switch (node.type) { case 'paragraph': return this.renderParagraph(node, listStack); case 'table': return this.renderTable(node); case 'image': return this.renderImage(node); case 'hyperlink': return this.renderHyperlink(node); default: return ''; } } /** * 渲染段落 - 优化版 */ renderParagraph(paragraph: ParagraphNode, listStack: string[]): string { const style = this.cssToString(paragraph.style); let content = this.renderChildren(paragraph.children); console.log(`📝 渲染段落: "${content.substring(0, 50)}...", 样式: ${style}`); console.log(`🔍 段落编号信息:`, paragraph.numbering); // 处理空段落 - 保留空行以改善排版 if (!content || content.trim() === '') { return '<p style="margin: 12px 0; height: 1.2em;">&nbsp;</p>'; } // 1. 优先处理正式编号列表 if (paragraph.numbering) { console.log(`📋 处理正式编号列表: numId=${paragraph.numbering.numId}, level=${paragraph.numbering.level}`); return this.renderFormalList(paragraph, content, style, listStack); } // 2. 检测手动列表项 const manualListItem = this.detectManualListItem(content); if (manualListItem) { console.log(`📋 处理手动列表项: 类型=${manualListItem.listType}, 内容="${manualListItem.content}"`); return this.renderManualListWithCounter(manualListItem, style, listStack); } // 3. 检测是否为标题 const headingLevel = this.detectHeading(paragraph); if (headingLevel) { console.log(`📰 渲染标题: 级别=${headingLevel}`); return `<h${headingLevel} style="${style}">${content}</h${headingLevel}>`; } // 4. 检测是否为代码块 if (this.isCodeBlock(paragraph)) { console.log(`💻 渲染代码块`); return `<pre><code style="${style}">${this.escapeHtml(content)}</code></pre>`; } // 5. 普通段落 console.log(`📄 渲染普通段落`); const defaultStyle = 'margin: 12px 0; line-height: 1.6; padding: 4px 0;'; const combinedStyle = style ? `${defaultStyle} ${style}` : defaultStyle; return `<p style="${combinedStyle}">${content}</p>`; } /** * 渲染正式编号列表 */ private renderFormalList(paragraph: ParagraphNode, content: string, style: string, listStack: string[]): string { const { numId, level } = paragraph.numbering!; const listType = this.getListType(numId, level); const levelNum = parseInt(level); console.log(`🔢 正式列表: 类型=${listType}, 级别=${levelNum}, 当前栈=${listStack.join(',')}`); let result = ''; // 关闭多余的嵌套级别 while (listStack.length > levelNum) { const closingType = listStack.pop(); if (closingType) { result += `</${closingType}>`; console.log(`🔚 关闭列表: ${closingType}`); } } // 检查是否需要开启新列表 if (listStack.length === levelNum) { // 同级别,检查类型是否一致 if (listStack[levelNum - 1] !== listType) { // 类型不同,关闭旧的,开启新的 const oldType = listStack.pop(); result += `</${oldType}>`; // 获取起始序号 const startNum = this.getListStartNumber(numId, level); result += `<${listType} start="${startNum}">`; listStack.push(listType); console.log(`🔄 切换列表类型: ${oldType} -> ${listType}`); } } else { // 新的嵌套级别 const startNum = this.getListStartNumber(numId, level); result += `<${listType} start="${startNum}">`; listStack.push(listType); console.log(`🆕 开启新列表: ${listType}`); } // 简单地添加列表项,让浏览器处理序号 result += `<li style="${style}">${content}</li>`; return result; } /** * 渲染手动列表项 */ private renderManualList(manualListItem: { content: string; listType: 'ul' | 'ol'; level: number }, style: string, listStack: string[]): string { const { content, listType, level } = manualListItem; console.log(`✋ 手动列表: 类型=${listType}, 级别=${level}, 当前栈=${listStack.join(',')}`); let result = ''; // 检查是否需要开启新列表 const needNewList = listStack.length === 0 || listStack[listStack.length - 1] !== listType || listStack.length <= level; if (needNewList) { // 关闭不匹配的列表 while (listStack.length > level) { const closingType = listStack.pop(); if (closingType) { result += `</${closingType}>`; } } // 开启新列表 result += `<${listType}>`; listStack.push(listType); console.log(`🆕 开启手动列表: ${listType}`); } // 添加列表项 result += `<li style="${style}">${content}</li>`; return result; } /** * 渲染带计数器的手动列表项 */ private renderManualListWithCounter(manualListItem: { content: string; listType: 'ul' | 'ol'; level: number }, style: string, listStack: string[]): string { const { content, listType, level } = manualListItem; console.log(`✋ 手动列表: 类型=${listType}, 级别=${level}, 当前栈=${listStack.join(',')}`); let result = ''; // 关闭不匹配的列表 while (listStack.length > level) { const closingType = listStack.pop(); if (closingType) { result += `</${closingType}>`; } } // 检查是否需要开启新列表 if (listStack.length === 0 || listStack[listStack.length - 1] !== listType || listStack.length <= level) { result += `<${listType}>`; listStack.push(listType); console.log(`🆕 开启手动列表: ${listType}`); } // 简单地添加列表项,让浏览器处理序号 result += `<li style="${style}">${content}</li>`; return result; } /** * 渲染子元素 */ renderChildren(children: ChildNode[]): string { return children.map(child => { if (child.type === 'run' || child.type === 'break') { return this.renderRun(child); } else if (child.type === 'hyperlink') { return this.renderHyperlink(child); } else if (child.type === 'image') { return this.renderImage(child); } return ''; }).join(''); } /** * 渲染文本运行 */ renderRun(run: RunNode): string { if (!run.text && run.text !== '0') return ''; const style = this.cssToString(run.style); let text = this.escapeHtml(run.text); // 改善空格和换行处理 // 保留连续空格 text = text.replace(/ {2,}/g, match => '&nbsp;'.repeat(match.length)); // 保留制表符 text = text.replace(/\t/g, '&nbsp;&nbsp;&nbsp;&nbsp;'); // 处理换行符 text = text.replace(/\n/g, '<br>'); // 保留行首空格 text = text.replace(/^\s+/gm, match => '&nbsp;'.repeat(match.length)); if (style) { return `<span style="${style}">${text}</span>`; } return text; } /** * 渲染超链接 */ renderHyperlink(hyperlink: HyperlinkNode): string { const content = this.renderChildren(hyperlink.children); if (hyperlink.href) { return `<a href="${this.escapeHtml(hyperlink.href)}">${content}</a>`; } return content; } /** * 渲染表格 */ renderTable(table: TableNode): string { const style = this.cssToString(table.style); const rows = table.rows.map(row => this.renderTableRow(row)).join('\n'); return `<table style="${style}">\n${rows}\n</table>`; } /** * 渲染表格行 */ renderTableRow(row: TableRowNode): string { const style = this.cssToString(row.style); const cells = row.cells.map(cell => this.renderTableCell(cell)).join(''); return `<tr style="${style}">${cells}</tr>`; } /** * 渲染表格单元格 */ renderTableCell(cell: TableCellNode): string { if (cell.rowspan === 0) return ''; // 被合并的单元格不渲染 const style = this.cssToString(cell.style); const content = cell.children.map(child => this.renderNode(child)).join(''); let attrs = `style="${style}"`; if (cell.colspan > 1) attrs += ` colspan="${cell.colspan}"`; if (cell.rowspan > 1) attrs += ` rowspan="${cell.rowspan}"`; return `<td ${attrs}>${content || '&nbsp;'}</td>`; } /** * 渲染图片 - 使用相对路径 */ renderImage(image: ImageNode): string { if (!image.src) { console.log('⚠️ 图片源为空,跳过渲染'); return ''; } const style = this.cssToString(image.style); const alt = this.escapeHtml(image.alt || '图片'); // 直接使用相对路径,因为图片已经被复制到输出目录 const imageSrc = image.src; console.log(`🖼️ 渲染图片: src=${imageSrc}, alt=${alt}, style=${style}`); return `<img src="${imageSrc}" alt="${alt}" style="${style}" loading="lazy" />`; } /** * 检测手动列表项(以项目符号开头的段落)- 增强版 */ detectManualListItem(content: string): { content: string; listType: 'ul' | 'ol'; level: number } | null { if (!content || content.trim() === '') return null; const trimmedContent = content.trim(); console.log(`🔍 检测列表项: "${trimmedContent}"`); // 检测各种项目符号 - 增强版 const bulletPatterns = [ // 无序列表符号 { pattern: /^[•●○◆◇■□▪▫⁃‣⁌⁍]\s*(.+)$/, type: 'ul' as const, level: 0 }, { pattern: /^[·]\s*(.+)$/, type: 'ul' as const, level: 0 }, { pattern: /^[-*+]\s*(.+)$/, type: 'ul' as const, level: 0 }, // 有序列表符号 { pattern: /^\d+[.)、]\s*(.+)$/, type: 'ol' as const, level: 0 }, { pattern: /^[a-zA-Z][.)、]\s*(.+)$/, type: 'ol' as const, level: 0 }, { pattern: /^[①②③④⑤⑥⑦⑧⑨⑩]\s*(.+)$/, type: 'ol' as const, level: 0 }, { pattern: /^[一二三四五六七八九十]、\s*(.+)$/, type: 'ol' as const, level: 0 }, // Word 特有格式 { pattern: /^\s*[•]\s+(.+)$/, type: 'ul' as const, level: 0 }, { pattern: /^\s*\d+\.\s+(.+)$/, type: 'ol' as const, level: 0 }, // 带缩进的列表项 { pattern: /^\s{2,}[•●○]\s*(.+)$/, type: 'ul' as const, level: 1 }, { pattern: /^\s{2,}\d+[.)、]\s*(.+)$/, type: 'ol' as const, level: 1 }, // 考勤时间特有格式 { pattern: /^工作时间[::]\s*(.+)$/, type: 'ul' as const, level: 0 }, { pattern: /^午休时间[::]\s*(.+)$/, type: 'ul' as const, level: 0 }, { pattern: /^因工作安排[,,]\s*(.+)$/, type: 'ul' as const, level: 0 } ]; for (const { pattern, type, level } of bulletPatterns) { const match = trimmedContent.match(pattern); if (match) { console.log(`✅ 匹配到列表项: 类型=${type}, 级别=${level}, 内容="${match[1].trim()}"`); return { content: match[1].trim(), listType: type, level: level }; } } console.log(`❌ 未匹配到列表项`); return null; } /** * 检测是否为代码块 */ isCodeBlock(paragraph: ParagraphNode): boolean { // 检查是否使用等宽字体 const fontFamily = paragraph.style['font-family']; if (fontFamily && /Consolas|Courier|Monaco|monospace/i.test(fontFamily)) { return true; } // 检查是否有代码块背景色 const bgColor = paragraph.style['background-color']; if (bgColor && /^#[fF][0-9a-fA-F]{5}$/.test(bgColor)) { return true; } return false; } /** * 检测标题级别 - 增强版 */ detectHeading(paragraph: ParagraphNode): number | null { // 检查是否有标题样式 const hasHeadingStyle = this.hasHeadingStyle(paragraph); if (hasHeadingStyle) { return hasHeadingStyle; } // 根据字体大小和粗体判断 const fontSize = paragraph.style['font-size']; const isBold = paragraph.style['font-weight'] === 'bold'; if (fontSize) { const size = parseFloat(fontSize); // 调整标题判断逻辑 if (size >= 22 && isBold) return 1; if (size >= 18 && isBold) return 2; if (size >= 16 && isBold) return 3; if (size >= 14 && isBold) return 4; if (size >= 12 && isBold) return 5; } // 检查子元素中是否有大字体粗体文本 for (const child of paragraph.children) { if (child.type === 'run') { const childFontSize = child.style['font-size']; const childIsBold = child.style['font-weight'] === 'bold'; if (childFontSize && childIsBold) { const size = parseFloat(childFontSize); if (size >= 22) return 1; if (size >= 18) return 2; if (size >= 16) return 3; if (size >= 14) return 4; if (size >= 12) return 5; } } } return null; } /** * 检查是否有标题样式 */ hasHeadingStyle(paragraph: ParagraphNode): number | null { // 这里可以根据样式名称判断是否为标题 // 例如检查样式ID是否包含"Heading"等 return null; } /** * 生成CSS样式表 */ generateCSS(): string { const css: string[] = []; // 基础样式 - 优化排版 css.push(` body { font-family: 'Microsoft YaHei', 'SimHei', 'Calibri', 'Arial', sans-serif; line-height: 1.5; margin: 20px; padding: 0; word-wrap: break-word; word-break: break-all; } p { margin: 12px 0; line-height: 1.6; text-align: justify; word-spacing: 0.1em; padding: 4px 0; } h1, h2, h3, h4, h5, h6 { margin: 20px 0 12px 0; line-height: 1.4; font-weight: bold; padding: 6px 0; } table { border-collapse: collapse; width: 100%; margin: 10px 0; } td, th { padding: 8px 12px; border: 1px solid #ddd; vertical-align: top; word-wrap: break-word; } pre { background-color: #f8f9fa; padding: 12px; border-radius: 4px; overflow-x: auto; margin: 10px 0; border-left: 4px solid #007acc; } code { font-family: 'Consolas', 'Monaco', 'Courier New', monospace; background-color: #f1f3f4; padding: 2px 4px; border-radius: 3px; } ul, ol { margin: 8px 0; padding-left: 24px; list-style-position: outside; display: block; } ul { list-style-type: disc; } ol { list-style-type: decimal; } /* 确保嵌套列表正确显示 */ ol ol { list-style-type: lower-alpha; } ol ol ol { list-style-type: lower-roman; } li { margin: 4px 0; line-height: 1.6; padding: 2px 0; display: list-item; word-wrap: break-word; overflow-wrap: break-word; } li p { margin: 0; padding: 0; display: inline; } blockquote { margin: 16px 0; padding: 12px 20px; border-left: 4px solid #ddd; background-color: #f9f9f9; font-style: italic; line-height: 1.6; } `); // 添加文档中定义的样式 for (const [styleId, styleDef] of Object.entries(this.styles)) { if (styleDef.css && Object.keys(styleDef.css).length > 0) { const selector = `.${styleId}`; const rules = this.cssToString(styleDef.css); css.push(`${selector} { ${rules} }`); } } return css.join('\n'); } /** * 包装HTML */ wrapHTML(content: string): string { return `<!DOCTYPE html> <html lang="zh-CN"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Converted Document</title> <style>${this.generateCSS()}</style> </head> <body> ${content} </body> </html>`; } // === 工具方法 === /** * 获取ZIP文件内容 */ async getFileContent(zip: any, path: string): Promise<string | null> { if (zip.files[path]) { return zip.file(path).async('string'); } return null; } /** * 解析XML */ parseXml(xml: string): Promise<any> { return new Promise((resolve, reject) => { this.parser.parseString(xml, (err: any, result: any) => { if (err) reject(err); else resolve(result); }); }); } /** * CSS对象转字符串 */ cssToString(cssObj: Record<string, string>): string { if (!cssObj || Object.keys(cssObj).length === 0) return ''; return Object.entries(cssObj) .filter(([key, value]) => value != null && value !== '') .map(([key, value]) => `${key}: ${value}`) .join('; '); } /** * Twip转像素 */ twipToPixel(twip: string | number): number { return Math.round(parseInt(String(twip)) / 20); } /** * 八分之一点转像素 */ eighthPointToPixel(eighthPoint: string | number): number { return Math.round(parseInt(String(eighthPoint)) / 8); } /** * EMU转像素 */ emuToPixel(emu: string | number): number { return Math.round(parseInt(String(emu)) / 9525); } /** * 映射对齐方式 */ mapAlignment(align: string): string { const map: Record<string, string> = { 'left': 'left', 'center': 'center', 'right': 'right', 'both': 'justify', 'justify': 'justify' }; return map[align] || 'left'; } /** * 映射边框样式 */ mapBorderStyle(style: string): string { const map: Record<string, string> = { 'single': 'solid', 'double': 'double', 'dotted': 'dotted', 'dashed': 'dashed', 'none': 'none' }; return map[style] || 'solid'; } /** * 映射高亮颜色 */ mapHighlightColor(color: string): string { const map: Record<string, string> = { 'yellow': '#FFFF00', 'green': '#00FF00', 'cyan': '#00FFFF', 'magenta': '#FF00FF', 'blue': '#0000FF', 'red': '#FF0000', 'darkBlue': '#000080', 'darkCyan': '#008080', 'darkGreen': '#008000', 'darkMagenta': '#800080', 'darkRed': '#800000', 'darkYellow': '#808000', 'darkGray': '#808080', 'lightGray': '#C0C0C0', 'black': '#000000' }; return map[color] || '#FFFF00'; } /** * 转义HTML特殊字符 */ escapeHtml(text: string): string { const map: Record<string, string> = { '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;', "'": '&#039;' }; return text.replace(/[&<>"']/g, char => map[char]); } /** * 获取列表起始序号 */ getListStartNumber(numId: string, level: string): number { const abstractId = this.numberingCache.get(numId); if (abstractId && this.numbering[abstractId]) { const levelDef = this.numbering[abstractId][level]; if (levelDef && levelDef.start) { const startNum = parseInt(levelDef.start); return isNaN(startNum) ? 1 : startNum; } } return 1; } // 移除了formatListNumber和toRoman方法,现在使用HTML原生列表功能 /** * 获取列表类型 - 增强版 */ getListType(numId: string, level: string): string { console.log(`🔍 获取列表类型: numId=${numId}, level=${level}`); const abstractId = this.numberingCache.get(numId); console.log(`📋 抽象ID映射: ${numId} -> ${abstractId}`); if (abstractId && this.numbering[abstractId]) { const levelDef = this.numbering[abstractId][level]; console.log(`📊 级别定义:`, levelDef); if (levelDef) { const numFmt = levelDef.numFmt; const text = levelDef.text; console.log(`🎯 编号格式: numFmt=${numFmt}, text="${text}"`); // 扩展项目符号类型检测 const isBullet = numFmt === 'bullet' || numFmt === 'disc' || numFmt === 'circle' || numFmt === 'square' || text === '·' || text === '•' || text === '○' || text === '■' || text === '' || text === '%1.' || text.includes('●') || text.includes('◆') || text.includes('▪') || text.includes('◇') || text.includes('□'); const listType = isBullet ? 'ul' : 'ol'; console.log(`✅ 确定列表类型: ${listType}`); return listType; } } else { console.log(`⚠️ 未找到编号定义,使用默认无序列表`); return 'ul'; } console.log(`📝 默认返回有序列表`); return 'ol'; } } // 导出转换函数 export async function convertDocxToHtmlWithOOXML(inputPath: string, options: any = {}): Promise<any> { try { const parser = new OOXMLParser(); console.log('🚀 开始自定义OOXML解析器转换...'); console.log('📁 输入文件:', inputPath); console.log('📂 输出目录:', options.outputDir || '未指定'); // 确保有输出目录 const outputDir = options.outputDir || path.dirname(inputPath); console.log('📂 实际使用的输出目录:', outputDir); const result = await parser.parseDocx(inputPath, outputDir); console.log('✅ 自定义OOXML解析器转换完成'); console.log('📊 媒体文件数量:', Object.keys(result.media).length); return { success: true, content: result.html, css: result.css, media: result.media, metadata: { format: 'html', originalFormat: 'docx', converter: 'custom-ooxml-parser', stylesPreserved: true, standalone: true, conversionTime: Date.now(), mediaCount: Object.keys(result.media).length } }; } catch (error: any) { console.error('❌ 自定义OOXML解析器转换失败:', error); return { success: false, error: error.message, metadata: { converter: 'custom-ooxml-parser', failed: true } }; } } export { OOXMLParser };

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Tele-AI/doc-ops-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server