analyze_structure
Analyze document structure, headings, and formatting elements in .docx files to understand organization and layout.
Instructions
Analyze document structure, headings, and formatting elements
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| file_path | Yes | Path to the .docx file |
Implementation Reference
- src/index.ts:143-217 (handler)The handler function for the 'analyze_structure' tool. It processes a DOCX file by converting it to HTML using mammoth, extracts headings, paragraphs, formatting elements, and computes document statistics.async ({ file_path }) => { try { const absolutePath = path.resolve(file_path) if (!fs.existsSync(absolutePath)) { throw new Error(`File not found: ${absolutePath}`) } // Convert to HTML to analyze structure const htmlResult = await mammoth.convertToHtml({ path: absolutePath }) const html = htmlResult.value // Extract text for analysis const textResult = await mammoth.extractRawText({ path: absolutePath }) const text = textResult.value // Analyze structure const headings = (html.match(/<h[1-6][^>]*>.*?<\/h[1-6]>/gi) || []).map( (h: string) => ({ level: parseInt(h.match(/<h([1-6])/)![1]), text: h.replace(/<[^>]*>/g, '').trim(), }) ) const paragraphs = (html.match(/<p[^>]*>.*?<\/p>/gi) || []).length const strongElements = (html.match(/<strong[^>]*>.*?<\/strong>/gi) || []) .length const emElements = (html.match(/<em[^>]*>.*?<\/em>/gi) || []).length const lists = (html.match(/<[uo]l[^>]*>.*?<\/[uo]l>/gi) || []).length const listItems = (html.match(/<li[^>]*>.*?<\/li>/gi) || []).length const analysis = { document_stats: { total_characters: text.length, total_words: text .split(/\s+/) .filter((word: string) => word.length > 0).length, total_paragraphs: paragraphs, total_headings: headings.length, }, structure: { headings: headings, heading_levels: [ ...new Set(headings.map((h: any) => h.level)), ].sort(), }, formatting: { bold_elements: strongElements, italic_elements: emElements, lists: lists, list_items: listItems, }, messages: htmlResult.messages, } return { content: [ { type: 'text', text: JSON.stringify(analysis, null, 2), }, ], } } catch (error) { return { content: [ { type: 'text', text: `Error analyzing structure: ${(error as Error).message}`, }, ], isError: true, } } }
- src/index.ts:140-142 (schema)Input schema for the 'analyze_structure' tool, requiring a file_path parameter.{ file_path: z.string().describe('Path to the .docx file'), },
- src/index.ts:137-218 (registration)Registration of the 'analyze_structure' tool using server.tool(), including name, description, input schema, and inline handler function.server.tool( 'analyze_structure', 'Analyze document structure, headings, and formatting elements', { file_path: z.string().describe('Path to the .docx file'), }, async ({ file_path }) => { try { const absolutePath = path.resolve(file_path) if (!fs.existsSync(absolutePath)) { throw new Error(`File not found: ${absolutePath}`) } // Convert to HTML to analyze structure const htmlResult = await mammoth.convertToHtml({ path: absolutePath }) const html = htmlResult.value // Extract text for analysis const textResult = await mammoth.extractRawText({ path: absolutePath }) const text = textResult.value // Analyze structure const headings = (html.match(/<h[1-6][^>]*>.*?<\/h[1-6]>/gi) || []).map( (h: string) => ({ level: parseInt(h.match(/<h([1-6])/)![1]), text: h.replace(/<[^>]*>/g, '').trim(), }) ) const paragraphs = (html.match(/<p[^>]*>.*?<\/p>/gi) || []).length const strongElements = (html.match(/<strong[^>]*>.*?<\/strong>/gi) || []) .length const emElements = (html.match(/<em[^>]*>.*?<\/em>/gi) || []).length const lists = (html.match(/<[uo]l[^>]*>.*?<\/[uo]l>/gi) || []).length const listItems = (html.match(/<li[^>]*>.*?<\/li>/gi) || []).length const analysis = { document_stats: { total_characters: text.length, total_words: text .split(/\s+/) .filter((word: string) => word.length > 0).length, total_paragraphs: paragraphs, total_headings: headings.length, }, structure: { headings: headings, heading_levels: [ ...new Set(headings.map((h: any) => h.level)), ].sort(), }, formatting: { bold_elements: strongElements, italic_elements: emElements, lists: lists, list_items: listItems, }, messages: htmlResult.messages, } return { content: [ { type: 'text', text: JSON.stringify(analysis, null, 2), }, ], } } catch (error) { return { content: [ { type: 'text', text: `Error analyzing structure: ${(error as Error).message}`, }, ], isError: true, } } } )