Skip to main content
Glama

Embeddings Searcher

by thypon
mcp_server.py11.3 kB
#!/usr/bin/env python3 """ MCP Server for embeddings-based documentation search. Provides tools for Cursor/Claude to query implementation documentation. """ import asyncio from typing import Any, Dict, List import argparse from mcp.server import Server from mcp.server.stdio import stdio_server from mcp.types import ( Resource, Tool, TextContent ) # Import our documentation searcher from embeddings_searcher import DocumentationSearcher class DocumentationMCPServer: def __init__(self, kb_path: str, docs_db_path: str = "embeddings_docs.db", model_name: str = "all-MiniLM-L6-v2"): self.server = Server("documentation-searcher") self.searcher = DocumentationSearcher(kb_path, docs_db_path, model_name) self.setup_handlers() def setup_handlers(self): """Setup MCP protocol handlers.""" @self.server.list_resources() async def list_resources() -> List[Resource]: """List available documentation resources.""" return [ Resource( uri="docs://search", name="Documentation Search", description="Search through markdown documentation using semantic similarity", mimeType="text/plain" ) ] @self.server.read_resource() async def read_resource(uri: str) -> str: """Read resource content.""" if uri == "docs://search": return "Use the search_docs tool to query documentation with semantic search." else: raise ValueError(f"Unknown resource: {uri}") @self.server.list_tools() async def list_tools() -> List[Tool]: """List available tools.""" return [ Tool( name="search_docs", description="Search through documentation using semantic similarity matching. Returns relevant document chunks with context.", inputSchema={ "type": "object", "properties": { "query": { "type": "string", "description": "Search query - can be natural language description of what you're looking for" }, "max_results": { "type": "integer", "description": "Maximum number of results to return (default: 10)", "default": 10 }, "repo": { "type": "string", "description": "Search within specific repository (optional)" }, "min_similarity": { "type": "number", "description": "Minimum similarity threshold (default: 0.1)", "default": 0.1 } }, "required": ["query"] } ), Tool( name="list_repos", description="List all indexed repositories", inputSchema={ "type": "object", "properties": {} } ), Tool( name="get_stats", description="Get indexing statistics (repositories, documents, chunks)", inputSchema={ "type": "object", "properties": {} } ), Tool( name="get_document", description="Retrieve the content of a specific document by path. Supports pagination to avoid overwhelming responses.", inputSchema={ "type": "object", "properties": { "path": { "type": "string", "description": "Relative path to the document from repos directory" }, "max_lines": { "type": "integer", "description": "Maximum number of lines to return (default: 100)", "default": 100 }, "offset": { "type": "integer", "description": "Line number to start from (1-based, default: 1)", "default": 1 } }, "required": ["path"] } ), ] @self.server.call_tool() async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]: """Handle tool calls.""" try: if name == "search_docs": return await self._search_docs(arguments) elif name == "list_repos": return await self._list_repos(arguments) elif name == "get_stats": return await self._get_stats(arguments) elif name == "get_document": return await self._get_document(arguments) else: raise ValueError(f"Unknown tool: {name}") except Exception as e: return [TextContent(type="text", text=f"Error: {str(e)}")] async def _search_docs(self, args: Dict[str, Any]) -> List[TextContent]: """Search documentation and return formatted results.""" query = args.get("query", "") max_results = args.get("max_results", 10) repo = args.get("repo") min_similarity = args.get("min_similarity", 0.1) if not query: return [TextContent(type="text", text="Error: Query is required")] # Perform search if repo: results = self.searcher.search_by_repo(query, repo, max_results) else: results = self.searcher.search(query, max_results, min_similarity) if not results: search_scope = f" in repository '{repo}'" if repo else "" return [TextContent(type="text", text=f"No relevant documents found for query: '{query}'{search_scope}")] # Format results search_scope = f" in {repo}" if repo else "" response = f"**Search:** {query}{search_scope} ({len(results)} results)\n\n" for i, result in enumerate(results, 1): chunk = result.chunk response += f"**{i}. {chunk.title}** ({chunk.repo_name})\n" response += f" File: `{chunk.file_path}`\n" if chunk.section_header: response += f" Section: {chunk.section_header}\n" response += f" Lines: {chunk.line_start}-{chunk.line_end}\n" response += f" Similarity: {result.similarity:.3f}\n" # Show content preview content = chunk.content if len(content) > 400: content = content[:400] + "..." response += f" \n{content}\n\n" return [TextContent(type="text", text=response)] async def _list_repos(self, args: Dict[str, Any]) -> List[TextContent]: """List all indexed repositories.""" repos = self.searcher.list_repositories() if not repos: return [TextContent(type="text", text="No repositories indexed yet.")] response = f"**Indexed Repositories ({len(repos)}):**\n\n" for i, repo in enumerate(repos, 1): response += f"{i}. {repo}\n" return [TextContent(type="text", text=response)] async def _get_stats(self, args: Dict[str, Any]) -> List[TextContent]: """Get indexing statistics.""" stats = self.searcher.get_stats() response = "**Documentation Search Statistics:**\n\n" response += f"- Repositories: {stats['repositories']}\n" response += f"- Documents: {stats['documents']}\n" response += f"- Chunks: {stats['chunks']}\n" return [TextContent(type="text", text=response)] async def _get_document(self, args: Dict[str, Any]) -> List[TextContent]: """Retrieve document content with optional pagination.""" path = args.get("path", "") max_lines = args.get("max_lines", 100) offset = args.get("offset", 1) if not path: return [TextContent(type="text", text="Error: Document path is required")] try: full_path = self.searcher.repos_path / path with open(full_path, 'r', encoding='utf-8') as f: all_lines = f.readlines() total_lines = len(all_lines) start_idx = max(0, offset - 1) # Convert to 0-based index end_idx = min(total_lines, start_idx + max_lines) # Get the requested lines selected_lines = all_lines[start_idx:end_idx] content = ''.join(selected_lines) # Build response with pagination info response = f"# Document: {path}\n" response += f"**Lines {offset}-{start_idx + len(selected_lines)} of {total_lines}**\n\n" if start_idx > 0: response += "*(Use offset=1 to see from the beginning)*\n" if end_idx < total_lines: next_offset = end_idx + 1 response += f"*(Use offset={next_offset} to see more lines)*\n" response += f"\n```markdown\n{content}```" return [TextContent(type="text", text=response)] except FileNotFoundError: return [TextContent(type="text", text=f"Error: Document not found: {path}")] except Exception as e: return [TextContent(type="text", text=f"Error reading document: {str(e)}")] async def run(self): """Run the MCP server.""" async with stdio_server() as (read_stream, write_stream): await self.server.run( read_stream, write_stream, self.server.create_initialization_options() ) async def main(): parser = argparse.ArgumentParser(description="Documentation MCP Server") parser.add_argument("--kb-path", default="/Users/thypon/kb", help="Path to knowledge base") parser.add_argument("--docs-db-path", default="embeddings_docs.db", help="Path to docs embeddings database") parser.add_argument("--model", default="all-MiniLM-L6-v2", help="Sentence transformer model name") args = parser.parse_args() server = DocumentationMCPServer(args.kb_path, args.docs_db_path, args.model) await server.run() if __name__ == "__main__": asyncio.run(main())

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/thypon/kb'

If you have feedback or need assistance with the MCP directory API, please join our Discord server