find_text_in_document

Locate specific text in Microsoft Word documents by searching with customizable options like case sensitivity and whole word matching.

Instructions

Find occurrences of specific text in a Word document.

Input Schema

TableJSON Schema

Name	Required	Description	Default
`filename`	Yes
`text_to_find`	Yes
`match_case`	No
`whole_word`	No

Implementation Reference

word_document_server/main.py:379-385 (registration)

Registers the 'find_text_in_document' tool using FastMCP's @mcp.tool() decorator. This wrapper function delegates execution to the implementation in extended_document_tools.

@mcp.tool()
def find_text_in_document(filename: str, text_to_find: str, match_case: bool = True,
                         whole_word: bool = False):
    """Find occurrences of specific text in a Word document."""
    return extended_document_tools.find_text_in_document(
        filename, text_to_find, match_case, whole_word
    )

word_document_server/tools/extended_document_tools.py:41-64 (handler)

Main handler function that performs input validation, ensures DOCX extension, calls the core find_text utility, formats results as JSON, and handles errors.

async def find_text_in_document(filename: str, text_to_find: str, match_case: bool = True, whole_word: bool = False) -> str:
    """Find occurrences of specific text in a Word document.
    
    Args:
        filename: Path to the Word document
        text_to_find: Text to search for in the document
        match_case: Whether to match case (True) or ignore case (False)
        whole_word: Whether to match whole words only (True) or substrings (False)
    """
    filename = ensure_docx_extension(filename)
    
    if not os.path.exists(filename):
        return f"Document {filename} does not exist"
    
    if not text_to_find:
        return "Search text cannot be empty"
    
    try:
        
        result = find_text(filename, text_to_find, match_case, whole_word)
        return json.dumps(result, indent=2)
    except Exception as e:
        return f"Failed to search for text: {str(e)}"

word_document_server/utils/extended_document_utils.py:42-166 (helper)

Core helper function implementing the text search logic. Searches paragraphs and table cells, supports case-sensitive/insensitive and whole-word/substring matching, collects detailed occurrence information.

def find_text(doc_path: str, text_to_find: str, match_case: bool = True, whole_word: bool = False) -> Dict[str, Any]:
    """
    Find all occurrences of specific text in a Word document.
    
    Args:
        doc_path: Path to the Word document
        text_to_find: Text to search for
        match_case: Whether to perform case-sensitive search
        whole_word: Whether to match whole words only
    
    Returns:
        Dictionary with search results
    """
    import os
    if not os.path.exists(doc_path):
        return {"error": f"Document {doc_path} does not exist"}
    
    if not text_to_find:
        return {"error": "Search text cannot be empty"}
    
    try:
        doc = Document(doc_path)
        results = {
            "query": text_to_find,
            "match_case": match_case,
            "whole_word": whole_word,
            "occurrences": [],
            "total_count": 0
        }
        
        # Search in paragraphs
        for i, para in enumerate(doc.paragraphs):
            # Prepare text for comparison
            para_text = para.text
            search_text = text_to_find
            
            if not match_case:
                para_text = para_text.lower()
                search_text = search_text.lower()
            
            # Find all occurrences (simple implementation)
            start_pos = 0
            while True:
                if whole_word:
                    # For whole word search, we need to check word boundaries
                    words = para_text.split()
                    found = False
                    for word_idx, word in enumerate(words):
                        if (word == search_text or 
                            (not match_case and word.lower() == search_text.lower())):
                            results["occurrences"].append({
                                "paragraph_index": i,
                                "position": word_idx,
                                "context": para.text[:100] + ("..." if len(para.text) > 100 else "")
                            })
                            results["total_count"] += 1
                            found = True
                    
                    # Break after checking all words
                    break
                else:
                    # For substring search
                    pos = para_text.find(search_text, start_pos)
                    if pos == -1:
                        break
                    
                    results["occurrences"].append({
                        "paragraph_index": i,
                        "position": pos,
                        "context": para.text[:100] + ("..." if len(para.text) > 100 else "")
                    })
                    results["total_count"] += 1
                    start_pos = pos + len(search_text)
        
        # Search in tables
        for table_idx, table in enumerate(doc.tables):
            for row_idx, row in enumerate(table.rows):
                for col_idx, cell in enumerate(row.cells):
                    for para_idx, para in enumerate(cell.paragraphs):
                        # Prepare text for comparison
                        para_text = para.text
                        search_text = text_to_find
                        
                        if not match_case:
                            para_text = para_text.lower()
                            search_text = search_text.lower()
                        
                        # Find all occurrences (simple implementation)
                        start_pos = 0
                        while True:
                            if whole_word:
                                # For whole word search, check word boundaries
                                words = para_text.split()
                                found = False
                                for word_idx, word in enumerate(words):
                                    if (word == search_text or 
                                        (not match_case and word.lower() == search_text.lower())):
                                        results["occurrences"].append({
                                            "location": f"Table {table_idx}, Row {row_idx}, Column {col_idx}",
                                            "position": word_idx,
                                            "context": para.text[:100] + ("..." if len(para.text) > 100 else "")
                                        })
                                        results["total_count"] += 1
                                        found = True
                                
                                # Break after checking all words
                                break
                            else:
                                # For substring search
                                pos = para_text.find(search_text, start_pos)
                                if pos == -1:
                                    break
                                
                                results["occurrences"].append({
                                    "location": f"Table {table_idx}, Row {row_idx}, Column {col_idx}",
                                    "position": pos,
                                    "context": para.text[:100] + ("..." if len(para.text) > 100 else "")
                                })
                                results["total_count"] += 1
                                start_pos = pos + len(search_text)
        
        return results
    except Exception as e:
        return {"error": f"Failed to search for text: {str(e)}"}

Office Word MCP Server