Solr MCP

by allenday
Verified
#!/usr/bin/env python3 """ Script to index documents in Solr with vector embeddings generated using Ollama's nomic-embed-text model. """ import argparse import asyncio import json import os import sys from typing import Dict, List sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from solr_mcp.embeddings.client import OllamaClient from solr_mcp.solr.client import SolrClient async def index_documents(json_file: str, collection: str = "vectors", commit: bool = True): """ Index documents from a JSON file into Solr with vector embeddings. Args: json_file: Path to the JSON file containing documents collection: Solr collection name commit: Whether to commit after indexing """ # Load documents with open(json_file, 'r', encoding='utf-8') as f: documents = json.load(f) # Initialize clients solr_client = SolrClient() # Check if collection exists collections = solr_client.list_collections() if collection not in collections: print(f"Warning: Collection '{collection}' not found in Solr. Available collections: {collections}") response = input("Do you want to continue with the default collection? (y/N): ") if response.lower() != 'y': print("Aborting.") return collection = solr_client.config.default_collection # Index documents with embeddings print(f"Indexing {len(documents)} documents with embeddings...") try: success = await solr_client.batch_index_with_generated_embeddings( documents=documents, collection=collection, commit=commit ) if success: print(f"Successfully indexed {len(documents)} documents in collection '{collection}'") else: print("Indexing failed") except Exception as e: print(f"Error indexing documents: {e}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Index documents in Solr with vector embeddings") parser.add_argument("json_file", help="Path to the JSON file containing documents") parser.add_argument("--collection", "-c", default="vectors", help="Solr collection name") parser.add_argument("--no-commit", dest="commit", action="store_false", help="Don't commit after indexing") args = parser.parse_args() asyncio.run(index_documents(args.json_file, args.collection, args.commit))