remove_document
Delete documents from the knowledge base index, optionally removing files from disk to manage stored information.
Instructions
Remove a document from the knowledge base index.
Args:
filepath: Path to the document file
delete_file: If True, also delete the file from disk (default: False)
Returns:
JSON string with removal resultsInput Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| filepath | Yes | ||
| delete_file | No |
Implementation Reference
- mcp_server/server.py:1480-1500 (handler)MCP tool registration for "remove_document".
def remove_document(filepath: str, delete_file: bool = False) -> str: """ Remove a document from the knowledge base index. Args: filepath: Path to the document file delete_file: If True, also delete the file from disk (default: False) Returns: JSON string with removal results """ if not filepath: return json.dumps({"status": "error", "message": "Filepath required"}) orchestrator = get_orchestrator() result = orchestrator.remove_document_by_path(filepath, delete_file=delete_file) if "error" in result: return json.dumps({"status": "error", "message": result["error"]}) return json.dumps({"status": "success", **result}, indent=2) - mcp_server/server.py:1041-1067 (handler)Logic implementation for "remove_document" in KnowledgeOrchestrator class.
def remove_document_by_path(self, filepath: str, delete_file: bool = False) -> Dict[str, Any]: """Remove a document from the index. Optionally delete from disk.""" filepath_resolved = str(Path(filepath).resolve()) doc_id = None for did, info in self._indexed_docs.items(): stored = str(Path(info.get("source", "")).resolve()) if stored == filepath_resolved: doc_id = did break if not doc_id: return {"error": f"Document not found in index: {filepath}"} chunks_removed = self._remove_document_chunks(doc_id) del self._indexed_docs[doc_id] if delete_file: try: Path(filepath).unlink(missing_ok=True) except Exception as e: print(f"[WARN] Failed to delete file {filepath}: {e}") self._save_metadata() self.query_cache.invalidate() return {"chunks_removed": chunks_removed, "filepath": filepath_resolved, "file_deleted": delete_file} - mcp_server/server.py:592-612 (helper)Helper function to remove individual document chunks from storage.
def _remove_document_chunks(self, doc_id: str) -> int: """Remove all chunks belonging to a document from ChromaDB and BM25.""" try: results = self.collection.get( where={"doc_id": doc_id}, include=["metadatas"] ) if results["ids"]: for meta in results["metadatas"]: content_hash = meta.get("content_hash", "") if content_hash and content_hash in self._chunk_hashes: del self._chunk_hashes[content_hash] self.collection.delete(ids=results["ids"]) self._bm25_initialized = False return len(results["ids"]) except Exception as e: print(f"[WARN] Failed to remove chunks for doc {doc_id}: {e}") return 0