PromptX

word-tool.tool.js•17.3 kB

/** * Word Tool - Word 文档读写工具 * * 战略意义： * 1. AI 文档理解：让 AI 能够读取和理解 Word 文档内容 * 2. 内容生成能力：让 AI 能够创建结构化的 Word 文档 * 3. 图片提取支持：完整提取文档中的图片资源 * 4. 格式保留：读取时转换为 Markdown 保留文档结构 * * 设计理念： * - read：提取文本和图片，转换为 AI 友好的格式 * - write：从结构化数据生成专业 Word 文档 * - 智能缓存：避免重复解析同一文档 * * 生态定位： * 对标 pdf-reader，为 AI 提供 Word 文档处理能力 * 与其他文档工具（pdf-reader、excel-tool）形成完整的办公文档生态 */ module.exports = { getDependencies() { return { 'mammoth': '^1.8.0', // 读取 docx 'docx': '^9.5.0', // 写入 docx 'pizzip': '^3.2.0' // 修改 docx }; }, getMetadata() { return { id: 'word-tool', name: 'Word Tool', description: 'Word 文档读写修改工具，支持文本提取、图片提取、文档生成和文本替换', version: '1.0.0', author: '鲁班' }; }, getSchema() { return { parameters: { type: 'object', properties: { action: { type: 'string', enum: ['read', 'write', 'modify'], description: '操作类型：read（读取文档）、write（创建文档）或 modify（修改文档）' }, // read 参数 docPath: { type: 'string', description: 'Word 文档的绝对路径（read 时必需）', minLength: 1 }, extractImages: { type: 'boolean', description: '是否提取图片（read 时可选，默认 true）', default: true }, format: { type: 'string', enum: ['markdown', 'html', 'text'], description: '输出格式（read 时可选，默认 markdown）', default: 'markdown' }, forceRefresh: { type: 'boolean', description: '强制重新解析，忽略缓存（read 时可选，默认 false）', default: false }, // write 参数 outputPath: { type: 'string', description: '输出文档路径（write 时必需）', minLength: 1 }, content: { type: 'object', description: '文档内容（write 时必需）', properties: { title: { type: 'string', description: '文档标题' }, sections: { type: 'array', description: '文档章节列表', items: { type: 'object', properties: { heading: { type: 'string', description: '章节标题' }, level: { type: 'number', description: '标题级别（1-6）', minimum: 1, maximum: 6 }, paragraphs: { type: 'array', description: '段落文本列表', items: { type: 'string' } } } } } } }, // modify 参数 replacements: { type: 'array', description: '文本替换规则列表（modify 时必需）', items: { type: 'object', properties: { find: { type: 'string', description: '要查找的文本（支持正则表达式）', minLength: 1 }, replace: { type: 'string', description: '替换后的文本' }, useRegex: { type: 'boolean', description: '是否使用正则表达式（默认 false）', default: false } }, required: ['find', 'replace'] }, minItems: 1 } }, required: ['action'] } }; }, getBridges() { return { 'word:read': { real: async (args, api) => { api.logger.info('[Bridge] Reading Word document with mammoth'); const mammoth = await api.importx('mammoth'); if (args.format === 'markdown') { return await mammoth.convertToMarkdown({ path: args.docPath }, args.options || {}); } else if (args.format === 'html') { return await mammoth.convertToHtml({ path: args.docPath }, args.options || {}); } else { return await mammoth.extractRawText({ path: args.docPath }); } }, mock: async (args, api) => { api.logger.debug('[Mock] Mock reading Word document'); return { value: '# Mock Document\n\nThis is mock content from a Word document.\n\n## Section 1\n\nParagraph text here.', messages: [] }; } }, 'word:write': { real: async (args, api) => { api.logger.info('[Bridge] Writing Word document with docx'); const { Document, Packer, Paragraph, HeadingLevel, TextRun } = await api.importx('docx'); const children = []; // 添加标题 if (args.content.title) { children.push( new Paragraph({ text: args.content.title, heading: HeadingLevel.HEADING_1 }) ); children.push(new Paragraph({ text: '' })); // 空行 } // 添加章节 if (args.content.sections) { for (const section of args.content.sections) { // 章节标题 if (section.heading) { const headingLevel = `HEADING_${section.level || 2}`; children.push( new Paragraph({ text: section.heading, heading: HeadingLevel[headingLevel] || HeadingLevel.HEADING_2 }) ); } // 章节段落 if (section.paragraphs) { for (const para of section.paragraphs) { children.push( new Paragraph({ children: [new TextRun(para)] }) ); } } children.push(new Paragraph({ text: '' })); // 章节后空行 } } const doc = new Document({ sections: [{ properties: {}, children: children }] }); return await Packer.toBuffer(doc); }, mock: async (args, api) => { api.logger.debug('[Mock] Mock writing Word document'); return Buffer.from('mock-docx-binary-data'); } }, 'word:modify': { real: async (args, api) => { api.logger.info('[Bridge] Modifying Word document with PizZip'); const PizZip = await api.importx('pizzip'); // 解压 docx const zip = new PizZip(args.docBuffer, { binary: true }); // 读取 document.xml let docXml = zip.file('word/document.xml').asText(); // 应用所有替换规则 for (const rule of args.replacements) { if (rule.useRegex) { const regex = new RegExp(rule.find, 'g'); docXml = docXml.replace(regex, rule.replace); } else { // 简单文本替换 const regex = new RegExp(rule.find.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'); docXml = docXml.replace(regex, rule.replace); } } // 更新 zip zip.file('word/document.xml', docXml); // 生成新的 docx return zip.generate({ type: 'nodebuffer', compression: 'DEFLATE' }); }, mock: async (args, api) => { api.logger.debug('[Mock] Mock modifying Word document'); return Buffer.from('mock-modified-docx-data'); } }, 'fs:readFile': { real: async (args, api) => { api.logger.info(`[Bridge] Reading file: ${args.path}`); const fs = await api.importx('fs'); return await fs.promises.readFile(args.path); }, mock: async (args, api) => { api.logger.debug(`[Mock] Mock reading file: ${args.path}`); return Buffer.from('mock-docx-binary-data'); } }, 'fs:writeFile': { real: async (args, api) => { api.logger.info(`[Bridge] Writing file: ${args.path}`); const fs = await api.importx('fs'); await fs.promises.writeFile(args.path, args.data); }, mock: async (args, api) => { api.logger.debug(`[Mock] Mock writing file: ${args.path}`); } }, 'fs:mkdir': { real: async (args, api) => { api.logger.info(`[Bridge] Creating directory: ${args.path}`); const fs = await api.importx('fs'); await fs.promises.mkdir(args.path, { recursive: true }); }, mock: async (args, api) => { api.logger.debug(`[Mock] Mock creating directory: ${args.path}`); } }, 'crypto:hash': { real: async (args, api) => { const crypto = await api.importx('crypto'); const hash = crypto.createHash('md5'); hash.update(args.data); return hash.digest('hex'); }, mock: async (args, api) => { api.logger.debug('[Mock] Generating mock hash'); return 'mock-hash-' + Date.now(); } }, 'path:join': { real: async (args, api) => { const path = await api.importx('path'); return path.join(...args.parts); }, mock: async (args, api) => { return args.parts.join('/'); } } }; }, getBridgeErrors() { return { 'word:read': [ { code: 'INVALID_DOCX', match: /not a valid zip file|End of central directory/i, solution: '检查文件是否是有效的 .docx 文档', retryable: false } ], 'fs:readFile': [ { code: 'FILE_NOT_FOUND', match: /ENOENT/, solution: '检查文件路径是否正确', retryable: false } ] }; }, async execute(params) { const { api } = this; const { action } = params; try { if (action === 'read') { return await this.handleRead(params); } else if (action === 'write') { return await this.handleWrite(params); } else if (action === 'modify') { return await this.handleModify(params); } else { throw new Error(`未知的操作类型: ${action}`); } } catch (error) { api.logger.error('Word 文档处理失败', error); throw error; } }, async handleRead(params) { const { api } = this; const { docPath, extractImages = true, format = 'markdown', forceRefresh = false } = params; api.logger.info('开始读取 Word 文档', { docPath, extractImages, format, forceRefresh }); // 读取文档文件 const docBuffer = await api.bridge.execute('fs:readFile', { path: docPath }); api.logger.info('文档文件读取成功', { size: docBuffer.length }); // 生成文档 hash const docHash = await api.bridge.execute('crypto:hash', { data: docBuffer }); // 获取工具信息，构建文档数据目录 const toolInfo = api.getInfo(); const docDir = await api.bridge.execute('path:join', { parts: [toolInfo.sandboxPath, 'docs', docHash] }); api.logger.info('文档数据目录', { docDir }); // 检查缓存 const allDocs = api.storage.getItem('docs') || {}; let docMetadata = allDocs[docHash]; if (!docMetadata || forceRefresh) { api.logger.info('首次处理此文档，解析内容'); // 创建文档目录 await api.bridge.execute('fs:mkdir', { path: docDir }); // 配置图片提取 let imageCount = 0; const images = []; const convertImageOptions = extractImages ? { convertImage: async (element) => { const imageBuffer = await element.read('base64'); const imgType = element.contentType.split('/').pop(); const imgName = `image-${imageCount}.${imgType}`; const imgPath = await api.bridge.execute('path:join', { parts: [docDir, imgName] }); // 保存图片 await api.bridge.execute('fs:writeFile', { path: imgPath, data: Buffer.from(imageBuffer, 'base64') }); images.push({ index: imageCount, path: imgPath, type: imgType }); imageCount++; return { src: imgPath }; } } : {}; // 读取文档内容 const result = await api.bridge.execute('word:read', { docPath, format, options: convertImageOptions }); // 缓存元信息 docMetadata = { docHash, docPath, format, imageCount, hasImages: imageCount > 0, createdAt: Date.now() }; allDocs[docHash] = docMetadata; api.storage.setItem('docs', allDocs); api.logger.info('文档解析完成', { imageCount, contentLength: result.value.length }); return { success: true, metadata: { docHash, format, imageCount, hasImages: imageCount > 0 }, content: { text: result.value, images, imagesDirectory: docDir }, cache: { fromCache: false } }; } else { api.logger.info('使用缓存的文档数据'); // 从缓存读取（重新解析文本，图片路径从文件系统读取） const result = await api.bridge.execute('word:read', { docPath, format }); return { success: true, metadata: { docHash: docMetadata.docHash, format: docMetadata.format, imageCount: docMetadata.imageCount, hasImages: docMetadata.hasImages }, content: { text: result.value, imagesDirectory: docDir }, cache: { fromCache: true } }; } }, async handleWrite(params) { const { api } = this; const { outputPath, content } = params; if (!outputPath) { throw new Error('write 操作需要提供 outputPath 参数'); } if (!content) { throw new Error('write 操作需要提供 content 参数'); } api.logger.info('开始创建 Word 文档', { outputPath }); // 生成文档 const buffer = await api.bridge.execute('word:write', { content }); // 保存文件 await api.bridge.execute('fs:writeFile', { path: outputPath, data: buffer }); api.logger.info('Word 文档创建成功', { outputPath, size: buffer.length }); return { success: true, outputPath, size: buffer.length, message: 'Word 文档创建成功' }; }, async handleModify(params) { const { api } = this; const { docPath, outputPath, replacements } = params; if (!docPath) { throw new Error('modify 操作需要提供 docPath 参数'); } if (!replacements || replacements.length === 0) { throw new Error('modify 操作需要提供 replacements 参数'); } const finalOutputPath = outputPath || docPath; // 不提供 outputPath 则覆盖原文件 api.logger.info('开始修改 Word 文档', { docPath, outputPath: finalOutputPath, replacementCount: replacements.length }); // 读取原文档 const docBuffer = await api.bridge.execute('fs:readFile', { path: docPath }); api.logger.info('文档文件读取成功', { size: docBuffer.length }); // 修改文档 const modifiedBuffer = await api.bridge.execute('word:modify', { docBuffer, replacements }); api.logger.info('文档修改完成', { newSize: modifiedBuffer.length }); // 保存修改后的文档 await api.bridge.execute('fs:writeFile', { path: finalOutputPath, data: modifiedBuffer }); api.logger.info('修改后的文档已保存', { path: finalOutputPath }); return { success: true, inputPath: docPath, outputPath: finalOutputPath, replacementCount: replacements.length, size: modifiedBuffer.length, message: '文档修改成功' }; } };

Loading blob content...

Latest Blog Posts

Don't Use Large Strings as Cache Keys
By punkpeye on January 11, 2026.
markdown
node-js
cache
What are Claude Skills?
By punkpeye on January 10, 2026.
mcp
skills
How to Test MCP Streamable HTTP Endpoints Using cURL
By punkpeye on January 2, 2026.
tutorial
bash

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Deepractice/PromptX'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

word-tool.tool.js•17.3 kB