Skip to main content
Glama

MCP RAG Server

index_move_files.py4.13 kB
#!/usr/bin/env python """ Script to index Move files that are already in the docs directory. """ import os import logging import argparse import numpy as np from mcp_server.utils.document_processor import DocumentProcessor from mcp_server.models.vector_store import FAISSVectorStore # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) def index_move_files(docs_dir="docs/move_files", index_file="data/faiss_index.bin"): """ Index Move files from a local directory. Args: docs_dir: Directory containing Move files index_file: Path to save the FAISS index Returns: Number of documents indexed """ # Check if docs directory exists if not os.path.exists(docs_dir): logger.error(f"Directory not found: {docs_dir}") return 0 # Initialize document processor and vector store logger.info(f"Initializing document processor and vector store") doc_processor = DocumentProcessor() # Check embedding dimension of the model test_embedding = doc_processor.get_embedding("This is a test") embed_dim = test_embedding.shape[0] logger.info(f"Embedding dimension of the model: {embed_dim}") # Initialize vector store with correct dimension vector_store = FAISSVectorStore(dimension=embed_dim) # Load existing index if it exists if os.path.exists(index_file): logger.info(f"Loading existing index from {index_file}") try: vector_store.load(index_file) logger.info(f"Loaded existing index with {len(vector_store.documents)} documents") except Exception as e: logger.error(f"Error loading existing index: {str(e)}") logger.info("Will create a new index instead") # Process document directory logger.info(f"Processing documents from {docs_dir}") documents = doc_processor.process_documents(docs_dir) if not documents: logger.warning("No documents processed. Check the file formats and content.") return 0 # Check dimensions of document embeddings if documents: sample_embedding = np.array(documents[0]['embedding']) logger.info(f"Sample document embedding dimension: {sample_embedding.shape}") # Check if all embeddings have the same dimension dims = [doc['embedding'].shape[0] for doc in documents] if len(set(dims)) > 1: logger.warning(f"Different embedding dimensions found: {set(dims)}") # Index documents logger.info(f"Indexing {len(documents)} document chunks") vector_store.index_documents(documents) # Save index os.makedirs(os.path.dirname(index_file), exist_ok=True) vector_store.save(index_file) logger.info(f"Index saved to {index_file} with {len(vector_store.documents)} total documents") return len(documents) def main(): """Entry point for command-line execution and pipx""" parser = argparse.ArgumentParser(description="Index Move files for the MCP Server") parser.add_argument("--docs-dir", default="docs/move_files", help="Directory containing Move files (default: docs/move_files)") parser.add_argument("--index-file", default="data/faiss_index.bin", help="Path to save the FAISS index (default: data/faiss_index.bin)") parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") args = parser.parse_args() # Set logging level if args.verbose: logging.getLogger().setLevel(logging.DEBUG) # Create directories if they don't exist os.makedirs(os.path.dirname(args.index_file), exist_ok=True) os.makedirs(args.docs_dir, exist_ok=True) num_indexed = index_move_files(docs_dir=args.docs_dir, index_file=args.index_file) print(f"Indexed {num_indexed} document chunks from Move files") return num_indexed if __name__ == "__main__": main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ProbonoBonobo/sui-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server