# Copyright (c) 2024 Rajesh
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""Main MCP server for docx operations."""
from pathlib import Path
from typing import Any, Optional
from docx import Document
from docx.shared import Inches
from fastmcp import FastMCP
from .config import config
from .exceptions import (
DocumentError,
DocxMcpError,
InvalidParameterError,
)
from .exceptions import (
FileNotFoundError as DocxFileNotFoundError,
)
from .logging_config import get_logger, setup_logging
from .utils import (
extract_all_text,
extract_equations,
get_document_info,
get_safe_file_info,
normalize_path,
safe_open_document,
safe_save_document,
validate_docx_file,
)
# Initialize logging
setup_logging()
logger = get_logger(__name__)
# Create FastMCP server
app = FastMCP(name="docx-mcp")
# =============================================================================
# Phase 1: Core Document Operations
# =============================================================================
@app.tool()
def create_docx(filepath: str, title: Optional[str] = None) -> dict[str, Any]:
"""
Create a new blank Word document.
Args:
filepath: Path where to create the document
title: Optional title for the document
Returns:
Dictionary with status and document info
"""
logger.info("Creating new document", extra={"tool": "create_docx", "filepath": filepath})
try:
path = normalize_path(filepath)
path.parent.mkdir(parents=True, exist_ok=True)
doc = Document()
if title:
doc.core_properties.title = title
doc.save(str(path))
logger.info("Document created successfully", extra={"filepath": filepath})
return {
"status": "success",
"filepath": str(path),
"message": f"Document created: {path.name}",
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "create_docx", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error creating document: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def read_docx(filepath: str) -> dict[str, Any]:
"""
Read and extract text content from a Word document.
Args:
filepath: Path to the document to read
Returns:
Dictionary with document text and metadata
"""
logger.info("Reading document", extra={"tool": "read_docx", "filepath": filepath})
try:
text = extract_all_text(filepath)
doc_info = get_document_info(filepath)
return {
"status": "success",
"filepath": filepath,
"content": text,
"info": doc_info,
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "read_docx", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error reading document: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def write_docx(filepath: str, content: str) -> dict[str, Any]:
"""
Create or overwrite a document with plain text content.
Args:
filepath: Path to the document
content: Text content to write
Returns:
Dictionary with status
"""
logger.info("Writing to document", extra={"tool": "write_docx", "filepath": filepath})
try:
path = normalize_path(filepath)
path.parent.mkdir(parents=True, exist_ok=True)
doc = Document()
for paragraph_text in content.split("\n"):
if paragraph_text.strip():
doc.add_paragraph(paragraph_text)
doc.save(str(path))
logger.info("Document written successfully", extra={"filepath": filepath})
return {
"status": "success",
"filepath": str(path),
"message": f"Document written: {path.name}",
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "write_docx", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error writing document: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def append_docx(filepath: str, content: str) -> dict[str, Any]:
"""
Append text content to an existing document.
Args:
filepath: Path to the document
content: Text content to append
Returns:
Dictionary with status
"""
logger.info("Appending to document", extra={"tool": "append_docx", "filepath": filepath})
try:
doc = safe_open_document(filepath)
for paragraph_text in content.split("\n"):
if paragraph_text.strip():
doc.add_paragraph(paragraph_text)
safe_save_document(doc, filepath)
logger.info("Content appended successfully", extra={"filepath": filepath})
return {
"status": "success",
"filepath": filepath,
"message": "Content appended successfully",
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "append_docx", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error appending to document: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def list_docx(directory: Optional[str] = None) -> dict[str, Any]:
"""
List all Word documents in a directory.
Args:
directory: Directory path to list (defaults to project directory)
Returns:
Dictionary with list of documents
"""
logger.info("Listing documents", extra={"tool": "list_docx", "directory": directory})
try:
if directory is None:
dir_path = config.project_dir
else:
dir_path = normalize_path(directory)
if not dir_path.is_dir():
raise InvalidParameterError("directory", "Path is not a directory")
documents = []
for ext in [".docx", ".doc", ".dotx", ".dot"]:
for file in dir_path.glob(f"**/*{ext}"):
try:
file_info = get_safe_file_info(str(file))
documents.append(file_info)
except Exception:
# Skip files that can't be accessed
continue
logger.info(f"Found {len(documents)} documents", extra={"tool": "list_docx"})
return {
"status": "success",
"directory": str(dir_path),
"count": len(documents),
"documents": documents,
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "list_docx", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error listing documents: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def delete_docx(filepath: str, confirm: bool = False) -> dict[str, Any]:
"""
Delete a Word document.
Args:
filepath: Path to the document to delete
confirm: Must be True to actually delete (safety check)
Returns:
Dictionary with status
"""
logger.info("Deleting document", extra={"tool": "delete_docx", "filepath": filepath})
try:
if not confirm:
return {
"status": "warning",
"message": "Deletion requires confirm=True",
}
path = validate_docx_file(filepath)
path.unlink()
logger.info("Document deleted successfully", extra={"filepath": filepath})
return {
"status": "success",
"message": f"Document deleted: {path.name}",
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "delete_docx", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error deleting document: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def copy_docx(source_filepath: str, destination_filepath: str) -> dict[str, Any]:
"""
Copy a Word document to a new location.
Args:
source_filepath: Path to the source document
destination_filepath: Path for the copied document
Returns:
Dictionary with status
"""
logger.info(
"Copying document",
extra={"tool": "copy_docx", "source": source_filepath, "destination": destination_filepath},
)
try:
source_path = validate_docx_file(source_filepath)
dest_path = normalize_path(destination_filepath)
dest_path.parent.mkdir(parents=True, exist_ok=True)
# Read and save as new document (preserves all formatting)
doc = Document(str(source_path))
doc.save(str(dest_path))
logger.info("Document copied successfully", extra={"destination": destination_filepath})
return {
"status": "success",
"source": str(source_path),
"destination": str(dest_path),
"message": "Document copied successfully",
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "copy_docx", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error copying document: {str(e)}")
return {"status": "error", "error": str(e)}
# =============================================================================
# Health Check
# =============================================================================
# =============================================================================
# Phase 2: Word Native Template System
# =============================================================================
@app.tool()
def list_merge_fields(filepath: str) -> dict[str, Any]:
"""
Extract all MERGEFIELD names from a document or template.
Args:
filepath: Path to the document
Returns:
Dictionary with list of merge field names
"""
logger.info("Listing merge fields", extra={"tool": "list_merge_fields", "filepath": filepath})
try:
doc = safe_open_document(filepath)
merge_fields = set()
# Search for MERGEFIELD fields in document
for paragraph in doc.paragraphs:
for run in paragraph.runs:
# Look for field codes
if run.element.xpath(".//w:instrText"):
for instr in run.element.xpath(".//w:instrText"):
text = instr.text or ""
if "MERGEFIELD" in text:
# Extract field name
parts = text.split()
if len(parts) > 1:
merge_fields.add(parts[1])
return {
"status": "success",
"filepath": filepath,
"merge_fields": list(sorted(merge_fields)),
"count": len(merge_fields),
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "list_merge_fields", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error listing merge fields: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def fill_merge_fields(filepath: str, data: dict[str, str]) -> dict[str, Any]:
"""
Fill merge fields in a document with provided data.
Args:
filepath: Path to the document or template
data: Dictionary mapping field names to values
Returns:
Dictionary with status and modified document info
"""
logger.info("Filling merge fields", extra={"tool": "fill_merge_fields", "filepath": filepath})
try:
doc = safe_open_document(filepath)
fields_filled = 0
# Simple merge field filling by replacing in text
for paragraph in doc.paragraphs:
for run in paragraph.runs:
for field_name, field_value in data.items():
if f"{{{{{field_name}}}}}" in run.text:
run.text = run.text.replace(f"{{{{{field_name}}}}}", str(field_value))
fields_filled += 1
safe_save_document(doc, filepath)
logger.info(f"Filled {fields_filled} merge fields", extra={"filepath": filepath})
return {
"status": "success",
"filepath": filepath,
"fields_filled": fields_filled,
"message": f"Filled {fields_filled} merge fields",
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "fill_merge_fields", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error filling merge fields: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def list_content_controls(filepath: str) -> dict[str, Any]:
"""
List all content controls in a document.
Args:
filepath: Path to the document
Returns:
Dictionary with list of content controls
"""
logger.info(
"Listing content controls",
extra={"tool": "list_content_controls", "filepath": filepath},
)
try:
doc = safe_open_document(filepath)
controls = []
# Find content controls in document
for element in doc.element.body:
# Look for content control elements
sdt_elements = element.xpath(".//w:sdt")
for sdt in sdt_elements:
# Extract control properties
properties = sdt.xpath(".//w:sdtPr")
if properties:
prop = properties[0]
tag_elements = prop.xpath(".//w:tag")
title_elements = prop.xpath(".//w:alias")
ns_val = (
"{http://schemas.openxmlformats.org/wordprocessingml/"
"2006/main}val"
)
tag = tag_elements[0].get(ns_val, "")
title = title_elements[0].get(ns_val, "")
controls.append({
"tag": tag,
"title": title,
})
return {
"status": "success",
"filepath": filepath,
"content_controls": controls,
"count": len(controls),
}
except DocxMcpError as e:
logger.warning(
e.message,
extra={"tool": "list_content_controls", "error_code": e.error_code},
)
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error listing content controls: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def get_document_properties(filepath: str) -> dict[str, Any]:
"""
Get document properties and metadata.
Args:
filepath: Path to the document
Returns:
Dictionary with document properties
"""
logger.info(
"Getting document properties",
extra={"tool": "get_document_properties", "filepath": filepath},
)
try:
doc = safe_open_document(filepath)
props = doc.core_properties
return {
"status": "success",
"filepath": filepath,
"properties": {
"title": props.title or "",
"subject": props.subject or "",
"author": props.author or "",
"keywords": props.keywords or "",
"comments": props.comments or "",
"category": props.category or "",
"created": str(props.created) if props.created else None,
"modified": str(props.modified) if props.modified else None,
},
}
except DocxMcpError as e:
logger.warning(
e.message,
extra={"tool": "get_document_properties", "error_code": e.error_code},
)
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error getting document properties: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def set_document_properties(
filepath: str,
title: Optional[str] = None,
subject: Optional[str] = None,
author: Optional[str] = None,
keywords: Optional[str] = None,
comments: Optional[str] = None,
) -> dict[str, Any]:
"""
Set document properties and metadata.
Args:
filepath: Path to the document
title: Document title
subject: Document subject
author: Document author
keywords: Document keywords
comments: Document comments
Returns:
Dictionary with status
"""
logger.info(
"Setting document properties",
extra={"tool": "set_document_properties", "filepath": filepath},
)
try:
doc = safe_open_document(filepath)
props = doc.core_properties
if title is not None:
props.title = title
if subject is not None:
props.subject = subject
if author is not None:
props.author = author
if keywords is not None:
props.keywords = keywords
if comments is not None:
props.comments = comments
safe_save_document(doc, filepath)
logger.info("Document properties updated", extra={"filepath": filepath})
return {
"status": "success",
"filepath": filepath,
"message": "Document properties updated",
}
except DocxMcpError as e:
logger.warning(
e.message,
extra={"tool": "set_document_properties", "error_code": e.error_code},
)
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error setting document properties: {str(e)}")
return {"status": "error", "error": str(e)}
# =============================================================================
# Phase 3: Style Management
# =============================================================================
@app.tool()
def list_styles(filepath: str) -> dict[str, Any]:
"""
List all paragraph and character styles available in a document.
Args:
filepath: Path to the document
Returns:
Dictionary with list of styles
"""
logger.info("Listing styles", extra={"tool": "list_styles", "filepath": filepath})
try:
doc = safe_open_document(filepath)
styles = {
"paragraph_styles": [],
"character_styles": [],
}
# Get styles from style definitions
for style in doc.styles:
style_info = {
"name": style.name,
"type": str(style.type),
}
if style.type == 1: # Paragraph style
styles["paragraph_styles"].append(style_info)
elif style.type == 2: # Character style
styles["character_styles"].append(style_info)
return {
"status": "success",
"filepath": filepath,
"styles": styles,
"total_styles": len(list(doc.styles)),
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "list_styles", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error listing styles: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def apply_paragraph_style(filepath: str, paragraph_index: int, style_name: str) -> dict[str, Any]:
"""
Apply a named paragraph style to a paragraph.
Args:
filepath: Path to the document
paragraph_index: Index of the paragraph (0-based)
style_name: Name of the style to apply (e.g., "Heading 1", "Normal")
Returns:
Dictionary with status
"""
logger.info(
"Applying paragraph style",
extra={"tool": "apply_paragraph_style", "filepath": filepath},
)
try:
doc = safe_open_document(filepath)
if paragraph_index < 0 or paragraph_index >= len(doc.paragraphs):
raise InvalidParameterError("paragraph_index", "Index out of range")
paragraph = doc.paragraphs[paragraph_index]
paragraph.style = style_name
safe_save_document(doc, filepath)
logger.info(f"Applied style {style_name} to paragraph", extra={"filepath": filepath})
return {
"status": "success",
"filepath": filepath,
"paragraph_index": paragraph_index,
"style_applied": style_name,
}
except DocxMcpError as e:
logger.warning(
e.message,
extra={"tool": "apply_paragraph_style", "error_code": e.error_code},
)
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error applying paragraph style: {str(e)}")
return {"status": "error", "error": str(e)}
# =============================================================================
# Phase 4: Lists - Bullets and Numbering
# =============================================================================
@app.tool()
def apply_bullet_list(
filepath: str,
paragraph_indices: list[int],
bullet_style: str = "bullet",
) -> dict[str, Any]:
"""
Apply bullet list formatting to paragraphs.
Args:
filepath: Path to the document
paragraph_indices: List of paragraph indices to bullet
bullet_style: Type of bullet ('bullet', 'circle', 'square', 'dash', 'check')
Returns:
Dictionary with status
"""
logger.info("Applying bullet list", extra={"tool": "apply_bullet_list", "filepath": filepath})
try:
doc = safe_open_document(filepath)
# Validate indices
for idx in paragraph_indices:
if idx < 0 or idx >= len(doc.paragraphs):
raise InvalidParameterError("paragraph_indices", f"Index {idx} out of range")
# Apply bullet formatting using list style
for idx in paragraph_indices:
paragraph = doc.paragraphs[idx]
# Use list format (list_number is the built-in bullet style)
paragraph.paragraph_format.left_indent = Inches(0.5)
paragraph.style = "List Bullet"
safe_save_document(doc, filepath)
logger.info(
f"Applied bullet list to {len(paragraph_indices)} paragraphs",
extra={"filepath": filepath},
)
return {
"status": "success",
"filepath": filepath,
"paragraphs_updated": len(paragraph_indices),
"bullet_style": bullet_style,
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "apply_bullet_list", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error applying bullet list: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def apply_numbered_list(
filepath: str,
paragraph_indices: list[int],
number_format: str = "1",
) -> dict[str, Any]:
"""
Apply numbered list formatting to paragraphs.
Args:
filepath: Path to the document
paragraph_indices: List of paragraph indices to number
number_format: Format for numbering ('1', 'a', 'i', 'I', 'A')
Returns:
Dictionary with status
"""
logger.info(
"Applying numbered list",
extra={"tool": "apply_numbered_list", "filepath": filepath},
)
try:
doc = safe_open_document(filepath)
# Validate indices
for idx in paragraph_indices:
if idx < 0 or idx >= len(doc.paragraphs):
raise InvalidParameterError("paragraph_indices", f"Index {idx} out of range")
# Apply numbered formatting
for idx in paragraph_indices:
paragraph = doc.paragraphs[idx]
paragraph.paragraph_format.left_indent = Inches(0.5)
paragraph.style = "List Number"
safe_save_document(doc, filepath)
logger.info(
f"Applied numbered list to {len(paragraph_indices)} paragraphs",
extra={"filepath": filepath},
)
return {
"status": "success",
"filepath": filepath,
"paragraphs_updated": len(paragraph_indices),
"number_format": number_format,
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "apply_numbered_list", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error applying numbered list: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def set_list_level(filepath: str, paragraph_index: int, level: int) -> dict[str, Any]:
"""
Set indentation level for a list paragraph.
Args:
filepath: Path to the document
paragraph_index: Index of the paragraph
level: Indentation level (0-8)
Returns:
Dictionary with status
"""
logger.info("Setting list level", extra={"tool": "set_list_level", "filepath": filepath})
try:
if level < 0 or level > 8:
raise InvalidParameterError("level", "Level must be between 0 and 8")
doc = safe_open_document(filepath)
if paragraph_index < 0 or paragraph_index >= len(doc.paragraphs):
raise InvalidParameterError("paragraph_index", "Index out of range")
paragraph = doc.paragraphs[paragraph_index]
paragraph.paragraph_format.left_indent = Inches(0.5 * level)
safe_save_document(doc, filepath)
logger.info(f"Set list level to {level} for paragraph", extra={"filepath": filepath})
return {
"status": "success",
"filepath": filepath,
"paragraph_index": paragraph_index,
"level": level,
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "set_list_level", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error setting list level: {str(e)}")
return {"status": "error", "error": str(e)}
# =============================================================================
# Phase 5: Images and Captions
# =============================================================================
@app.tool()
def insert_image(
filepath: str,
image_path: str,
width: Optional[float] = None,
height: Optional[float] = None,
) -> dict[str, Any]:
"""
Insert an image into a document.
Args:
filepath: Path to the document
image_path: Path to the image file to insert
width: Image width in inches (optional)
height: Image height in inches (optional)
Returns:
Dictionary with status
"""
logger.info("Inserting image", extra={"tool": "insert_image", "filepath": filepath})
try:
# Validate image file exists
if image_path.endswith(('.docx', '.doc', '.dotx', '.dot')):
image_file = validate_docx_file(image_path)
else:
image_file = Path(image_path)
if not image_file.exists():
raise DocxFileNotFoundError(image_path)
doc = safe_open_document(filepath)
# Add paragraph with image
last_paragraph = doc.add_paragraph()
run = last_paragraph.add_run()
# Insert image with optional sizing
if width and height:
run.add_picture(str(image_file), width=Inches(width), height=Inches(height))
elif width:
run.add_picture(str(image_file), width=Inches(width))
elif height:
run.add_picture(str(image_file), height=Inches(height))
else:
run.add_picture(str(image_file))
safe_save_document(doc, filepath)
logger.info("Image inserted successfully", extra={"filepath": filepath})
return {
"status": "success",
"filepath": filepath,
"image": image_path,
"message": "Image inserted successfully",
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "insert_image", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error inserting image: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def add_image_caption(
filepath: str,
image_index: int,
caption_text: str,
caption_type: str = "Figure",
) -> dict[str, Any]:
"""
Add a caption to an image in the document.
Args:
filepath: Path to the document
image_index: Index of the image (0-based)
caption_text: Caption text
caption_type: Type of caption (Figure, Table, Equation)
Returns:
Dictionary with status
"""
logger.info("Adding image caption", extra={"tool": "add_image_caption", "filepath": filepath})
try:
doc = safe_open_document(filepath)
# Find image location (simplified - assumes last paragraph with image)
# In a production system, would need more robust image tracking
if len(doc.paragraphs) > 0:
# Add caption paragraph after last paragraph
caption_paragraph = doc.add_paragraph()
caption_run = caption_paragraph.add_run(f"{caption_type}: {caption_text}")
caption_run.italic = True
caption_paragraph.style = "Caption"
safe_save_document(doc, filepath)
logger.info("Caption added successfully", extra={"filepath": filepath})
return {
"status": "success",
"filepath": filepath,
"caption": caption_text,
"message": "Caption added successfully",
}
else:
raise DocumentError("Document has no paragraphs", filepath)
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "add_image_caption", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error adding image caption: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def list_images(filepath: str) -> dict[str, Any]:
"""
List all images in a document.
Args:
filepath: Path to the document
Returns:
Dictionary with list of images and metadata
"""
logger.info("Listing images", extra={"tool": "list_images", "filepath": filepath})
try:
doc = safe_open_document(filepath)
images = []
# Count images in document relationships
for rel in doc.part.rels.values():
if "image" in rel.target_ref:
images.append({
"filename": rel.target_part.filename,
"content_type": rel.target_part.content_type,
})
return {
"status": "success",
"filepath": filepath,
"images": images,
"count": len(images),
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "list_images", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error listing images: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def list_equations(filepath: str) -> dict[str, Any]:
"""
List all mathematical equations/formulas in a Word document.
Extracts equations stored in Office Math Markup Language (OMML) format
and converts them to LaTeX notation for readability.
Args:
filepath: Path to the document
Returns:
Dictionary with list of equations including LaTeX representation
"""
logger.info("Listing equations", extra={"tool": "list_equations", "filepath": filepath})
try:
equations = extract_equations(filepath)
return {
"status": "success",
"filepath": filepath,
"equations": [
{
"index": eq["index"],
"paragraph_index": eq["paragraph_index"],
"type": eq["type"],
"latex": eq["latex"],
"context": eq["context"],
}
for eq in equations
],
"count": len(equations),
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "list_equations", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error listing equations: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def get_equation(filepath: str, equation_index: int, include_omml: bool = False) -> dict[str, Any]:
"""
Get a specific equation by index from a Word document.
Args:
filepath: Path to the document
equation_index: Index of the equation (0-based)
include_omml: If True, include the raw OMML XML
Returns:
Dictionary with equation details including LaTeX representation
"""
logger.info(
"Getting equation",
extra={"tool": "get_equation", "filepath": filepath, "index": equation_index},
)
try:
equations = extract_equations(filepath)
if equation_index < 0 or equation_index >= len(equations):
raise InvalidParameterError(
"equation_index",
f"Index {equation_index} out of range (0-{len(equations)-1})",
)
eq = equations[equation_index]
result = {
"status": "success",
"filepath": filepath,
"equation": {
"index": eq["index"],
"paragraph_index": eq["paragraph_index"],
"type": eq["type"],
"latex": eq["latex"],
"context": eq["context"],
},
}
if include_omml:
result["equation"]["omml"] = eq["omml"]
return result
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "get_equation", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error getting equation: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def extract_images(
filepath: str,
output_dir: Optional[str] = None,
return_base64: bool = False,
) -> dict[str, Any]:
"""
Extract all images from a Word document.
Args:
filepath: Path to the document
output_dir: Directory to save extracted images (optional)
return_base64: If True, return images as base64 encoded strings
Returns:
Dictionary with extracted images info and optionally base64 data
"""
import base64
logger.info("Extracting images", extra={"tool": "extract_images", "filepath": filepath})
try:
doc = safe_open_document(filepath)
extracted = []
image_index = 0
for rel in doc.part.rels.values():
if "image" in rel.target_ref:
image_part = rel.target_part
image_data = image_part.blob
filename = Path(image_part.partname).name
content_type = image_part.content_type
image_info = {
"index": image_index,
"filename": filename,
"content_type": content_type,
"size_bytes": len(image_data),
}
# Save to output directory if specified
if output_dir:
out_path = normalize_path(output_dir)
out_path.mkdir(parents=True, exist_ok=True)
image_file = out_path / filename
image_file.write_bytes(image_data)
image_info["saved_path"] = str(image_file)
# Return base64 if requested
if return_base64:
image_info["base64"] = base64.b64encode(image_data).decode("utf-8")
extracted.append(image_info)
image_index += 1
logger.info(f"Extracted {len(extracted)} images", extra={"filepath": filepath})
return {
"status": "success",
"filepath": filepath,
"images": extracted,
"count": len(extracted),
}
except DocxMcpError as e:
logger.warning(e.message, extra={"tool": "extract_images", "error_code": e.error_code})
return {"status": "error", "error": e.message, "error_code": e.error_code}
except Exception as e:
logger.error(f"Unexpected error extracting images: {str(e)}")
return {"status": "error", "error": str(e)}
@app.tool()
def health_check() -> dict[str, Any]:
"""
Check server health and status.
Returns:
Dictionary with health status
"""
return {
"status": "healthy",
"service": "docx-mcp",
"version": "0.1.0",
}
if __name__ == "__main__":
# Run the server
logger.info("Starting docx-mcp server")
app.run(transport="stdio")