Marcus Local MCP Server

Overview Schema Related Servers Score Discussions

search.py•5.91 KiB

#!/usr/bin/env python3 """ Standalone search script for MCP documentation Called by Next.js API endpoint """ import os import sys import json from pathlib import Path # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent.parent)) from dotenv import load_dotenv import chromadb from chromadb.config import Settings from openai import OpenAI # Load environment load_dotenv() def search_docs(query: str, max_results: int = 5, source_filter: str = None): """ Search documentation using semantic search Args: query: Search query max_results: Maximum number of results source_filter: Optional source name to filter results Returns: JSON string with search results """ try: # Initialize clients db_path = Path(__file__).parent.parent / "data" / "chroma_db" openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) chroma_client = chromadb.PersistentClient( path=str(db_path), settings=Settings(anonymized_telemetry=False) ) # Get collection collection = chroma_client.get_collection(name="documentation") # Load metadata to determine filter field metadata_path = Path(__file__).parent.parent / "data" / "chunks" / "metadata.json" metadata = {} if metadata_path.exists(): with open(metadata_path, 'r') as f: metadata = json.load(f) # Generate embedding for query embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-3-small") response = openai_client.embeddings.create( model=embedding_model, input=query ) query_embedding = response.data[0].embedding # Search with optional source filter search_params = { "query_embeddings": [query_embedding], "n_results": max_results } # Add source filter if specified if source_filter: # Determine which field to use for filtering # Documentation sources use "source_name", repositories use "source" filter_field = None # Check metadata to see if this is a documentation source for src in metadata.get("sources", []): if src.get("name") == source_filter: # It's a documentation source, use source_name if src.get("type") == "documentation": filter_field = "source_name" else: # It's a repository, use source filter_field = "source" break # If not found in metadata, try source_name first (for documentation) # then fall back to source (for repositories or URLs) if filter_field is None: # Default to source_name for documentation sources filter_field = "source_name" search_params["where"] = {filter_field: source_filter} results = collection.query(**search_params) # Format results formatted_results = [] if results['documents'][0]: for doc, metadata in zip(results['documents'][0], results['metadatas'][0]): # Check if this is a repository or documentation chunk is_repository = metadata.get('source_type') == 'repository' if is_repository: # Repository chunk - use file path as title formatted_results.append({ 'title': metadata.get('file_path', 'Untitled'), 'url': metadata.get('full_path', ''), 'content': doc, 'metadata': { 'wordCount': len(doc.split()), 'source': metadata.get('source', ''), 'source_name': metadata.get('source', ''), 'source_type': 'repository', 'file_path': metadata.get('file_path', ''), 'full_path': metadata.get('full_path', ''), 'lines': metadata.get('lines', '') } }) else: # Documentation chunk - use page title formatted_results.append({ 'title': metadata.get('title', 'Untitled'), 'url': metadata.get('url', ''), 'content': doc, 'metadata': { 'wordCount': len(doc.split()), 'source': metadata.get('source', ''), 'source_name': metadata.get('source_name', metadata.get('source', '').replace("https://", "").replace("http://", "").split("/")[0]), 'source_type': 'documentation' } }) # Return JSON output = { 'success': True, 'query': query, 'results': formatted_results, 'totalResults': len(formatted_results) } print(json.dumps(output)) return 0 except Exception as e: error_output = { 'success': False, 'error': str(e), 'query': query } print(json.dumps(error_output)) return 1 if __name__ == "__main__": if len(sys.argv) < 2: print(json.dumps({ 'success': False, 'error': 'Usage: search.py <query> [max_results] [source_filter]' })) sys.exit(1) query = sys.argv[1] max_results = int(sys.argv[2]) if len(sys.argv) > 2 else 5 source_filter = sys.argv[3] if len(sys.argv) > 3 else None sys.exit(search_docs(query, max_results, source_filter))

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Marcussy34/localMCP-crawl4ai-RAG'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

search.py•5.91 KiB