load_document

Load documents into DocNav-MCP for intelligent navigation and analysis, generating a document ID for structured access.

Instructions

Load a document for navigation and analysis.

Args: file_path: Path to the document file Returns: Success message with auto-generated document ID

Input Schema

TableJSON Schema

Name	Required	Description	Default
`file_path`	Yes

Implementation Reference

server.py:24-52 (handler)
The primary handler for the 'load_document' MCP tool. Resolves the file path, validates existence, loads the document using DocumentNavigator.load_document_from_file_sync, retrieves metadata, and returns a formatted success message with the document ID.
@mcp.tool() def load_document(file_path: str) -> str: """Load a document for navigation and analysis. Args: file_path: Path to the document file Returns: Success message with auto-generated document ID """ try: path = Path(file_path).resolve() if not path.exists(): return f"Error: File not found: {file_path}" # Use the synchronous version to avoid event loop conflicts doc_id, document = navigator.load_document_from_file_sync(path) metadata = navigator.get_document_metadata(doc_id) return ( f"Document loaded successfully!\n" f"File: {path.name}\n" f"Document ID: {doc_id}\n" f"Format: {metadata['format'] if metadata else 'unknown'}\n" f"Use get_outline('{doc_id}') to see document structure." ) except Exception as e: return f"Error loading document: {str(e)}"
docnav/navigator.py:580-632 (helper)
Helper method invoked by the tool handler to load the document synchronously from file. Handles both async and pure sync contexts, finds appropriate processor, processes the file, generates doc_id, stores document and metadata.
def load_document_from_file_sync(self, file_path: Path) -> Tuple[str, Document]: """Load document from file (synchronous version). Args: file_path: Path to the document file Returns: Tuple of (doc_id, Document) where doc_id is auto-generated UUID """ if not file_path.exists(): raise FileNotFoundError(f"File not found: {file_path}") # Normalize path to prevent injection issues normalized_path = self._normalize_file_path(file_path) try: # Check if we're in an async context (like MCP server) import asyncio try: # Try to get the running event loop asyncio.get_running_loop() # If we get here, we're in an async context # Fall back to sync processing immediately return self._load_file_fallback_sync(file_path) except RuntimeError: # No running event loop, we can use asyncio.run processor = self._find_processor(file_path) document = asyncio.run(processor.process(file_path)) doc_id = self._generate_doc_id() self.loaded_documents[doc_id] = document # Store metadata with normalized path self.document_metadata[doc_id] = { "title": file_path.name, "format": document.source_format, "source_type": "file", "file_path": normalized_path, "created_at": str(uuid.uuid1().time), } return doc_id, document except Exception as e: # For any error, fall back to sync processing try: return self._load_file_fallback_sync(file_path) except Exception as fallback_error: raise ValueError( f"Error loading document: {str(e)}. Fallback also failed: {str(fallback_error)}" )
docnav/navigator.py:633-724 (helper)
Fallback synchronous loader for files, handles PDF conversion to markdown using pymupdf4llm and text files by reading content and using text loader. Parses structure and stores document.
def _load_file_fallback_sync(self, file_path: Path) -> Tuple[str, Document]: """Fallback sync file loading for when async processors can't be used.""" normalized_path = self._normalize_file_path(file_path) # Handle PDF files directly with pymupdf4llm (which is actually sync) if file_path.suffix.lower() == ".pdf": try: import pymupdf4llm # Convert PDF to markdown using pymupdf4llm (this is actually synchronous) markdown_content = pymupdf4llm.to_markdown(str(file_path)) # Create Document object from .models import Document document = Document( file_path=file_path, title=file_path.stem, source_text=markdown_content, source_format="pdf", ) # Use markdown processor to parse the converted content # Create temporary file for processing import tempfile with tempfile.NamedTemporaryFile( mode="w", suffix=".md", delete=False, encoding="utf-8" ) as f: f.write(markdown_content) temp_path = Path(f.name) try: # Use the markdown processor synchronously by creating a simple parser from .processors.markdown import MarkdownProcessor md_processor = MarkdownProcessor() # Parse using the internal parsing method directly root = md_processor._parse_markdown_to_tree(markdown_content) document.root = root document.rebuild_index() finally: temp_path.unlink() # Clean up # Generate doc ID and store doc_id = self._generate_doc_id() self.loaded_documents[doc_id] = document # Store metadata self.document_metadata[doc_id] = { "title": file_path.name, "format": "pdf", "source_type": "file", "file_path": normalized_path, "created_at": str(uuid.uuid1().time), } return doc_id, document except ImportError: raise ValueError( "pymupdf4llm is required for PDF processing but not available" ) except Exception as e: raise ValueError(f"Error processing PDF file: {str(e)}") # For markdown and other text files content = file_path.read_text(encoding="utf-8") format_map = { ".md": "markdown", ".markdown": "markdown", ".xml": "xml", } file_format = format_map.get(file_path.suffix.lower(), "markdown") # Use the sync text loading method doc_id, document = self.load_document_from_text_sync( content, file_format, file_path.stem ) # Update metadata to reflect file source self.document_metadata[doc_id].update( { "source_type": "file", "file_path": normalized_path, } ) return doc_id, document
server.py:24-24 (registration)
The @mcp.tool() decorator registers the load_document function as an MCP tool.
@mcp.tool()

DocNav-MCP

load_document

Instructions

Input Schema

Implementation Reference

Other Tools

Latest Blog Posts

MCP directory API