MCP RAG Server

download_move_files.py•8.64 kB

#!/usr/bin/env python """ Script to download Move files from GitHub and index them using the MCP server. """ import os import argparse import logging import requests from dotenv import load_dotenv from mcp_server.utils.github_extractor import GitHubExtractor, extract_and_index_move_files from mcp_server.utils.document_processor import DocumentProcessor from mcp_server.models.vector_store import FAISSVectorStore from mcp_server.index_move_files import index_move_files # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Load environment variables load_dotenv() def download_and_index_move_files(query="use sui", output_dir="docs/move_files", index_file="data/faiss_index.bin", github_token=None, use_scraping=True, max_results=100, merge_with_existing=True): """ Download Move files from GitHub and index them in FAISS. Args: query: GitHub search query output_dir: Directory to save downloaded files index_file: Path to save/load FAISS index github_token: GitHub API token use_scraping: Whether to use web scraping as fallback max_results: Maximum number of files to download merge_with_existing: Whether to merge with existing index Returns: Number of files indexed """ # Extract Move files from GitHub num_files, file_paths = extract_and_index_move_files( query=query, output_dir=output_dir, github_token=github_token, use_scraping=use_scraping, max_results=max_results ) if num_files == 0: logger.warning("No Move files found. Check your search query and GitHub token.") return 0 logger.info(f"Downloaded {num_files} Move files to {output_dir}") # Initialize document processor and vector store doc_processor = DocumentProcessor() vector_store = FAISSVectorStore() # Load existing index if it exists and merge_with_existing is True if os.path.exists(index_file) and merge_with_existing: logger.info(f"Loading existing index from {index_file}") try: vector_store.load(index_file) logger.info(f"Loaded existing index with {len(vector_store.documents)} documents") except Exception as e: logger.error(f"Error loading existing index: {str(e)}") logger.info("Will create a new index instead") # Process document directory logger.info(f"Processing documents from {output_dir}") documents = doc_processor.process_documents(output_dir) if not documents: logger.warning("No documents processed. Check the file formats and content.") return 0 # Index documents logger.info(f"Indexing {len(documents)} document chunks") vector_store.index_documents(documents) # Save index os.makedirs(os.path.dirname(index_file), exist_ok=True) vector_store.save(index_file) logger.info(f"Index saved to {index_file} with {len(vector_store.documents)} total documents") return len(documents) def check_mcp_server(server_url="http://localhost:8000"): """Check if MCP server is running and handle indexing through API if available""" try: response = requests.get(f"{server_url}/docs") if response.status_code == 200: return True return False except: return False def main(): """Main function to parse arguments and run the script""" parser = argparse.ArgumentParser( description="Download Move files from GitHub and index them for the MCP Server" ) parser.add_argument( "--query", default="use sui", help="GitHub search query (default: 'use sui')" ) parser.add_argument( "--output-dir", default="docs/move_files", help="Directory to save downloaded files (default: docs/move_files)" ) parser.add_argument( "--index-file", default="data/faiss_index.bin", help="Path to save/load FAISS index (default: data/faiss_index.bin)" ) parser.add_argument( "--token", default=os.getenv("GITHUB_TOKEN"), help="GitHub personal access token (default: from GITHUB_TOKEN env var)" ) parser.add_argument( "--no-scraping", action="store_true", help="Disable web scraping fallback, use only GitHub API" ) parser.add_argument( "--max-results", type=int, default=100, help="Maximum number of files to download (default: 100)" ) parser.add_argument( "--new-index", action="store_true", help="Create a new index instead of merging with existing one" ) parser.add_argument( "--server-url", default="http://localhost:8000", help="MCP Server URL if already running (default: http://localhost:8000)" ) parser.add_argument( "--use-api", action="store_true", help="Use MCP server API for indexing if server is running" ) parser.add_argument( "--verbose", action="store_true", help="Enable verbose logging" ) args = parser.parse_args() # Set logging level if args.verbose: logging.getLogger().setLevel(logging.DEBUG) # Create output directory if it doesn't exist os.makedirs(args.output_dir, exist_ok=True) # Create data directory for index if needed os.makedirs(os.path.dirname(args.index_file), exist_ok=True) # Check if server is running and we should use the API server_running = check_mcp_server(args.server_url) if args.use_api else False # Step 1: Download files from GitHub first num_files, file_paths = extract_and_index_move_files( query=args.query, output_dir=args.output_dir, github_token=args.token, use_scraping=not args.no_scraping, max_results=args.max_results ) if num_files == 0: logger.warning("No Move files found. Check your search query and GitHub token.") return 1 logger.info(f"Downloaded {num_files} Move files to {args.output_dir}") # Step 2: Index the files if server_running and args.use_api: logger.info(f"MCP Server is running at {args.server_url}, using API for indexing") # Call the server's index endpoint with the directory try: response = requests.post( f"{args.server_url}/index", params={"directory_path": args.output_dir} ) if response.status_code == 200: result = response.json() logger.info(f"API indexing successful: {result['message']}") else: logger.error(f"API indexing failed: {response.status_code} - {response.text}") # Fall back to local indexing if API fails logger.info("Falling back to local indexing...") num_indexed = index_move_files(docs_dir=args.output_dir, index_file=args.index_file) logger.info(f"Indexed {num_indexed} document chunks locally") except Exception as e: logger.error(f"Error using API for indexing: {str(e)}") # Fall back to local indexing if API fails logger.info("Falling back to local indexing...") num_indexed = index_move_files(docs_dir=args.output_dir, index_file=args.index_file) logger.info(f"Indexed {num_indexed} document chunks locally") else: # Do local processing logger.info("Using local processing for indexing") num_indexed = index_move_files(docs_dir=args.output_dir, index_file=args.index_file) logger.info(f"Indexed {num_indexed} document chunks to the index") # Always verify that the index exists and is accessible try: vector_store = FAISSVectorStore() vector_store.load(args.index_file) logger.info(f"Successfully loaded index with {len(vector_store.documents)} documents") except Exception as e: logger.error(f"Error loading index after building: {str(e)}") logger.info("Processing complete") return 0 if __name__ == "__main__": main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ProbonoBonobo/sui-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server