reindex_documents
Rebuild document indexes for the knowledge base to update search accuracy after content changes or model updates.
Instructions
Index or reindex all documents in the knowledge base.
Args:
force: If True, smart reindex (detects changes + rebuilds BM25). FAST.
full_rebuild: If True, nuclear rebuild (deletes everything, re-embeds ALL). Use if model changed.
Returns:
JSON string with indexing statisticsInput Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| force | No | ||
| full_rebuild | No |
Implementation Reference
- mcp_server/server.py:1363-1387 (registration)MCP tool registration for reindex_documents which delegates to KnowledgeOrchestrator methods.
def reindex_documents(force: bool = False, full_rebuild: bool = False) -> str: """ Index or reindex all documents in the knowledge base. Args: force: If True, smart reindex (detects changes + rebuilds BM25). FAST. full_rebuild: If True, nuclear rebuild (deletes everything, re-embeds ALL). Use if model changed. Returns: JSON string with indexing statistics """ orchestrator = get_orchestrator() if full_rebuild: stats = orchestrator.nuclear_rebuild() operation = "nuclear_rebuild" elif force: stats = orchestrator.reindex_all() operation = "smart_reindex" else: stats = orchestrator.index_all() operation = "incremental_index" return json.dumps({"status": "success", "operation": operation, "stats": stats}, indent=2, ensure_ascii=False) - mcp_server/server.py:632-667 (handler)Implementation of the 'smart reindex' logic (triggered by reindex_documents(force=True)).
def reindex_all(self) -> Dict[str, Any]: """Smart reindex: incremental detection + BM25 rebuild + orphan cleanup.""" import shutil print("[REINDEX] Starting smart incremental reindex...") start_time = time.time() stats = self.index_all(force=False) print("[REINDEX] Rebuilding BM25 index...") self.bm25_index.clear() self._bm25_initialized = False self._ensure_bm25_index() chroma_dir = config.chroma_dir orphans_cleaned = 0 if chroma_dir.exists(): for item in chroma_dir.iterdir(): if item.is_dir() and len(item.name) == 36 and '-' in item.name: try: if not any(item.iterdir()): shutil.rmtree(item) orphans_cleaned += 1 except Exception: pass self.query_cache.invalidate() elapsed = time.time() - start_time stats["orphan_folders_cleaned"] = orphans_cleaned stats["elapsed_seconds"] = round(elapsed, 2) print(f"[REINDEX] Completed in {elapsed:.1f}s " f"(indexed: {stats['indexed']}, updated: {stats['updated']}, " f"skipped: {stats['skipped']}, deleted: {stats['deleted']})") return stats - mcp_server/server.py:669-713 (handler)Implementation of the 'nuclear rebuild' logic (triggered by reindex_documents(full_rebuild=True)).
def nuclear_rebuild(self) -> Dict[str, Any]: """Nuclear rebuild: DELETE everything and re-embed ALL documents.""" import shutil print("[NUCLEAR] Starting full rebuild...") start_time = time.time() try: self.chroma_client.delete_collection(config.collection_name) print("[NUCLEAR] Deleted ChromaDB collection") except Exception: pass chroma_dir = config.chroma_dir if chroma_dir.exists(): for item in chroma_dir.iterdir(): if item.is_dir() and len(item.name) == 36 and '-' in item.name: try: shutil.rmtree(item) except Exception: pass self.collection = self.chroma_client.get_or_create_collection( name=config.collection_name, embedding_function=self.embed_fn, metadata={"description": "Knowledge base for RAG"} ) self._indexed_docs = {} self.bm25_index.clear() self._bm25_initialized = False self._chunk_hashes = {} self.query_cache.invalidate() stats = self.index_all(force=True) self.bm25_index.build_index() self._bm25_initialized = True elapsed = time.time() - start_time stats["elapsed_seconds"] = round(elapsed, 2) print(f"[NUCLEAR] Full rebuild completed in {elapsed:.1f}s " f"({stats['indexed']} docs, {stats['chunks_added']} chunks)") return stats