Skip to main content
Glama

reindex_documents

Rebuild document indexes for the knowledge base to update search accuracy after content changes or model updates.

Instructions

Index or reindex all documents in the knowledge base.

Args:
    force: If True, smart reindex (detects changes + rebuilds BM25). FAST.
    full_rebuild: If True, nuclear rebuild (deletes everything, re-embeds ALL). Use if model changed.

Returns:
    JSON string with indexing statistics

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
forceNo
full_rebuildNo

Implementation Reference

  • MCP tool registration for reindex_documents which delegates to KnowledgeOrchestrator methods.
    def reindex_documents(force: bool = False, full_rebuild: bool = False) -> str:
        """
        Index or reindex all documents in the knowledge base.
    
        Args:
            force: If True, smart reindex (detects changes + rebuilds BM25). FAST.
            full_rebuild: If True, nuclear rebuild (deletes everything, re-embeds ALL). Use if model changed.
    
        Returns:
            JSON string with indexing statistics
        """
        orchestrator = get_orchestrator()
    
        if full_rebuild:
            stats = orchestrator.nuclear_rebuild()
            operation = "nuclear_rebuild"
        elif force:
            stats = orchestrator.reindex_all()
            operation = "smart_reindex"
        else:
            stats = orchestrator.index_all()
            operation = "incremental_index"
    
        return json.dumps({"status": "success", "operation": operation, "stats": stats}, indent=2, ensure_ascii=False)
  • Implementation of the 'smart reindex' logic (triggered by reindex_documents(force=True)).
    def reindex_all(self) -> Dict[str, Any]:
        """Smart reindex: incremental detection + BM25 rebuild + orphan cleanup."""
        import shutil
    
        print("[REINDEX] Starting smart incremental reindex...")
        start_time = time.time()
    
        stats = self.index_all(force=False)
    
        print("[REINDEX] Rebuilding BM25 index...")
        self.bm25_index.clear()
        self._bm25_initialized = False
        self._ensure_bm25_index()
    
        chroma_dir = config.chroma_dir
        orphans_cleaned = 0
        if chroma_dir.exists():
            for item in chroma_dir.iterdir():
                if item.is_dir() and len(item.name) == 36 and '-' in item.name:
                    try:
                        if not any(item.iterdir()):
                            shutil.rmtree(item)
                            orphans_cleaned += 1
                    except Exception:
                        pass
    
        self.query_cache.invalidate()
    
        elapsed = time.time() - start_time
        stats["orphan_folders_cleaned"] = orphans_cleaned
        stats["elapsed_seconds"] = round(elapsed, 2)
        print(f"[REINDEX] Completed in {elapsed:.1f}s "
              f"(indexed: {stats['indexed']}, updated: {stats['updated']}, "
              f"skipped: {stats['skipped']}, deleted: {stats['deleted']})")
    
        return stats
  • Implementation of the 'nuclear rebuild' logic (triggered by reindex_documents(full_rebuild=True)).
    def nuclear_rebuild(self) -> Dict[str, Any]:
        """Nuclear rebuild: DELETE everything and re-embed ALL documents."""
        import shutil
    
        print("[NUCLEAR] Starting full rebuild...")
        start_time = time.time()
    
        try:
            self.chroma_client.delete_collection(config.collection_name)
            print("[NUCLEAR] Deleted ChromaDB collection")
        except Exception:
            pass
    
        chroma_dir = config.chroma_dir
        if chroma_dir.exists():
            for item in chroma_dir.iterdir():
                if item.is_dir() and len(item.name) == 36 and '-' in item.name:
                    try:
                        shutil.rmtree(item)
                    except Exception:
                        pass
    
        self.collection = self.chroma_client.get_or_create_collection(
            name=config.collection_name,
            embedding_function=self.embed_fn,
            metadata={"description": "Knowledge base for RAG"}
        )
    
        self._indexed_docs = {}
        self.bm25_index.clear()
        self._bm25_initialized = False
        self._chunk_hashes = {}
        self.query_cache.invalidate()
    
        stats = self.index_all(force=True)
    
        self.bm25_index.build_index()
        self._bm25_initialized = True
    
        elapsed = time.time() - start_time
        stats["elapsed_seconds"] = round(elapsed, 2)
        print(f"[NUCLEAR] Full rebuild completed in {elapsed:.1f}s "
              f"({stats['indexed']} docs, {stats['chunks_added']} chunks)")
    
        return stats

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/lyonzin/knowledge-rag'

If you have feedback or need assistance with the MCP directory API, please join our Discord server