validate_document_footnotes
Check footnotes in Word documents for coherence and compliance. Identify ID conflicts, orphaned content, and missing styles to ensure proper formatting and structure.
Instructions
Validate all footnotes in document for coherence and compliance. Returns detailed report on ID conflicts, orphaned content, missing styles, etc.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| filename | Yes |
Implementation Reference
- Core handler function that performs comprehensive footnote validation by parsing document.xml, footnotes.xml, checking ID coherence, orphans, invalid locations, styles, relationships, and content types.def validate_document_footnotes(filename: str) -> Tuple[bool, str, Dict[str, Any]]: """Validate all footnotes in a document for coherence and compliance.""" if not os.path.exists(filename): return False, f"File not found: {filename}", {} report = { 'total_references': 0, 'total_content': 0, 'id_conflicts': [], 'orphaned_content': [], 'missing_references': [], 'invalid_locations': [], 'missing_styles': [], 'coherence_issues': [] } try: with zipfile.ZipFile(filename, 'r') as zf: # Check document.xml doc_xml = zf.read('word/document.xml') doc_root = etree.fromstring(doc_xml) nsmap = {'w': W_NS} # Get all footnote references ref_ids = set() for ref in doc_root.xpath('//w:footnoteReference', namespaces=nsmap): ref_id = ref.get(f'{{{W_NS}}}id') if ref_id: ref_ids.add(ref_id) report['total_references'] += 1 # Check location parent = ref.getparent() while parent is not None: if parent.tag in [f'{{{W_NS}}}hdr', f'{{{W_NS}}}ftr']: report['invalid_locations'].append(ref_id) break parent = parent.getparent() # Check footnotes.xml if 'word/footnotes.xml' in zf.namelist(): footnotes_xml = zf.read('word/footnotes.xml') footnotes_root = etree.fromstring(footnotes_xml) content_ids = set() for fn in footnotes_root.xpath('//w:footnote', namespaces=nsmap): fn_id = fn.get(f'{{{W_NS}}}id') if fn_id: content_ids.add(fn_id) if fn_id not in ['-1', '0']: # Exclude separators report['total_content'] += 1 # Find orphans and missing report['orphaned_content'] = list(content_ids - ref_ids - {'-1', '0'}) report['missing_references'] = list(ref_ids - content_ids) else: if report['total_references'] > 0: report['coherence_issues'].append('References exist but no footnotes.xml') # Check relationships if 'word/_rels/document.xml.rels' in zf.namelist(): rels_xml = zf.read('word/_rels/document.xml.rels') rels_root = etree.fromstring(rels_xml) rel_nsmap = {'r': REL_NS} fn_rels = rels_root.xpath( "//r:Relationship[contains(@Type, 'footnotes')]", namespaces=rel_nsmap ) if report['total_content'] > 0 and len(fn_rels) == 0: report['coherence_issues'].append('Missing footnotes relationship') elif len(fn_rels) > 1: report['coherence_issues'].append(f'Multiple footnote relationships: {len(fn_rels)}') # Check content types if '[Content_Types].xml' in zf.namelist(): ct_xml = zf.read('[Content_Types].xml') ct_root = etree.fromstring(ct_xml) ct_nsmap = {'ct': CT_NS} fn_overrides = ct_root.xpath( "//ct:Override[@PartName='/word/footnotes.xml']", namespaces=ct_nsmap ) if report['total_content'] > 0 and len(fn_overrides) == 0: report['coherence_issues'].append('Missing footnotes content type') elif len(fn_overrides) > 1: report['coherence_issues'].append(f'Multiple footnote content types: {len(fn_overrides)}') # Check styles if 'word/styles.xml' in zf.namelist(): styles_xml = zf.read('word/styles.xml') styles_root = etree.fromstring(styles_xml) ref_style = styles_root.xpath('//w:style[@w:styleId="FootnoteReference"]', namespaces=nsmap) text_style = styles_root.xpath('//w:style[@w:styleId="FootnoteText"]', namespaces=nsmap) if not ref_style: report['missing_styles'].append('FootnoteReference') if not text_style: report['missing_styles'].append('FootnoteText') # Determine if valid is_valid = ( len(report['id_conflicts']) == 0 and len(report['orphaned_content']) == 0 and len(report['missing_references']) == 0 and len(report['invalid_locations']) == 0 and len(report['coherence_issues']) == 0 ) if is_valid: message = "Document footnotes are valid" else: message = "Document has footnote issues" return is_valid, message, report except Exception as e: return False, f"Error validating document: {str(e)}", report
- MCP tool wrapper that calls the core validate_document_footnotes function and formats the response as a dictionary for the MCP server.async def validate_footnotes_tool(filename: str) -> Dict[str, Any]: """ Validate all footnotes in a document. Provides comprehensive validation report including: - ID conflicts - Orphaned content - Missing styles - Invalid locations - Coherence issues Args: filename: Path to the Word document Returns: Dict with validation status and detailed report """ filename = ensure_docx_extension(filename) if not os.path.exists(filename): return { "valid": False, "message": f"Document {filename} does not exist", "report": {} } # Call validation is_valid, message, report = validate_document_footnotes(filename) return { "valid": is_valid, "message": message, "report": report }
- word_document_server/main.py:358-362 (registration)MCP tool registration using FastMCP @mcp.tool() decorator, defining the tool entry point that delegates to the footnote_tools wrapper.@mcp.tool() def validate_document_footnotes(filename: str): """Validate all footnotes in document for coherence and compliance. Returns detailed report on ID conflicts, orphaned content, missing styles, etc.""" return footnote_tools.validate_footnotes_tool(filename)