mcp_server.py•11.3 kB
#!/usr/bin/env python3
"""
MCP Server for embeddings-based documentation search.
Provides tools for Cursor/Claude to query implementation documentation.
"""
import asyncio
from typing import Any, Dict, List
import argparse
from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.types import (
Resource,
Tool,
TextContent
)
# Import our documentation searcher
from embeddings_searcher import DocumentationSearcher
class DocumentationMCPServer:
def __init__(self, kb_path: str, docs_db_path: str = "embeddings_docs.db", model_name: str = "all-MiniLM-L6-v2"):
self.server = Server("documentation-searcher")
self.searcher = DocumentationSearcher(kb_path, docs_db_path, model_name)
self.setup_handlers()
def setup_handlers(self):
"""Setup MCP protocol handlers."""
@self.server.list_resources()
async def list_resources() -> List[Resource]:
"""List available documentation resources."""
return [
Resource(
uri="docs://search",
name="Documentation Search",
description="Search through markdown documentation using semantic similarity",
mimeType="text/plain"
)
]
@self.server.read_resource()
async def read_resource(uri: str) -> str:
"""Read resource content."""
if uri == "docs://search":
return "Use the search_docs tool to query documentation with semantic search."
else:
raise ValueError(f"Unknown resource: {uri}")
@self.server.list_tools()
async def list_tools() -> List[Tool]:
"""List available tools."""
return [
Tool(
name="search_docs",
description="Search through documentation using semantic similarity matching. Returns relevant document chunks with context.",
inputSchema={
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query - can be natural language description of what you're looking for"
},
"max_results": {
"type": "integer",
"description": "Maximum number of results to return (default: 10)",
"default": 10
},
"repo": {
"type": "string",
"description": "Search within specific repository (optional)"
},
"min_similarity": {
"type": "number",
"description": "Minimum similarity threshold (default: 0.1)",
"default": 0.1
}
},
"required": ["query"]
}
),
Tool(
name="list_repos",
description="List all indexed repositories",
inputSchema={
"type": "object",
"properties": {}
}
),
Tool(
name="get_stats",
description="Get indexing statistics (repositories, documents, chunks)",
inputSchema={
"type": "object",
"properties": {}
}
),
Tool(
name="get_document",
description="Retrieve the content of a specific document by path. Supports pagination to avoid overwhelming responses.",
inputSchema={
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Relative path to the document from repos directory"
},
"max_lines": {
"type": "integer",
"description": "Maximum number of lines to return (default: 100)",
"default": 100
},
"offset": {
"type": "integer",
"description": "Line number to start from (1-based, default: 1)",
"default": 1
}
},
"required": ["path"]
}
),
]
@self.server.call_tool()
async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]:
"""Handle tool calls."""
try:
if name == "search_docs":
return await self._search_docs(arguments)
elif name == "list_repos":
return await self._list_repos(arguments)
elif name == "get_stats":
return await self._get_stats(arguments)
elif name == "get_document":
return await self._get_document(arguments)
else:
raise ValueError(f"Unknown tool: {name}")
except Exception as e:
return [TextContent(type="text", text=f"Error: {str(e)}")]
async def _search_docs(self, args: Dict[str, Any]) -> List[TextContent]:
"""Search documentation and return formatted results."""
query = args.get("query", "")
max_results = args.get("max_results", 10)
repo = args.get("repo")
min_similarity = args.get("min_similarity", 0.1)
if not query:
return [TextContent(type="text", text="Error: Query is required")]
# Perform search
if repo:
results = self.searcher.search_by_repo(query, repo, max_results)
else:
results = self.searcher.search(query, max_results, min_similarity)
if not results:
search_scope = f" in repository '{repo}'" if repo else ""
return [TextContent(type="text", text=f"No relevant documents found for query: '{query}'{search_scope}")]
# Format results
search_scope = f" in {repo}" if repo else ""
response = f"**Search:** {query}{search_scope} ({len(results)} results)\n\n"
for i, result in enumerate(results, 1):
chunk = result.chunk
response += f"**{i}. {chunk.title}** ({chunk.repo_name})\n"
response += f" File: `{chunk.file_path}`\n"
if chunk.section_header:
response += f" Section: {chunk.section_header}\n"
response += f" Lines: {chunk.line_start}-{chunk.line_end}\n"
response += f" Similarity: {result.similarity:.3f}\n"
# Show content preview
content = chunk.content
if len(content) > 400:
content = content[:400] + "..."
response += f" \n{content}\n\n"
return [TextContent(type="text", text=response)]
async def _list_repos(self, args: Dict[str, Any]) -> List[TextContent]:
"""List all indexed repositories."""
repos = self.searcher.list_repositories()
if not repos:
return [TextContent(type="text", text="No repositories indexed yet.")]
response = f"**Indexed Repositories ({len(repos)}):**\n\n"
for i, repo in enumerate(repos, 1):
response += f"{i}. {repo}\n"
return [TextContent(type="text", text=response)]
async def _get_stats(self, args: Dict[str, Any]) -> List[TextContent]:
"""Get indexing statistics."""
stats = self.searcher.get_stats()
response = "**Documentation Search Statistics:**\n\n"
response += f"- Repositories: {stats['repositories']}\n"
response += f"- Documents: {stats['documents']}\n"
response += f"- Chunks: {stats['chunks']}\n"
return [TextContent(type="text", text=response)]
async def _get_document(self, args: Dict[str, Any]) -> List[TextContent]:
"""Retrieve document content with optional pagination."""
path = args.get("path", "")
max_lines = args.get("max_lines", 100)
offset = args.get("offset", 1)
if not path:
return [TextContent(type="text", text="Error: Document path is required")]
try:
full_path = self.searcher.repos_path / path
with open(full_path, 'r', encoding='utf-8') as f:
all_lines = f.readlines()
total_lines = len(all_lines)
start_idx = max(0, offset - 1) # Convert to 0-based index
end_idx = min(total_lines, start_idx + max_lines)
# Get the requested lines
selected_lines = all_lines[start_idx:end_idx]
content = ''.join(selected_lines)
# Build response with pagination info
response = f"# Document: {path}\n"
response += f"**Lines {offset}-{start_idx + len(selected_lines)} of {total_lines}**\n\n"
if start_idx > 0:
response += "*(Use offset=1 to see from the beginning)*\n"
if end_idx < total_lines:
next_offset = end_idx + 1
response += f"*(Use offset={next_offset} to see more lines)*\n"
response += f"\n```markdown\n{content}```"
return [TextContent(type="text", text=response)]
except FileNotFoundError:
return [TextContent(type="text", text=f"Error: Document not found: {path}")]
except Exception as e:
return [TextContent(type="text", text=f"Error reading document: {str(e)}")]
async def run(self):
"""Run the MCP server."""
async with stdio_server() as (read_stream, write_stream):
await self.server.run(
read_stream,
write_stream,
self.server.create_initialization_options()
)
async def main():
parser = argparse.ArgumentParser(description="Documentation MCP Server")
parser.add_argument("--kb-path", default="/Users/thypon/kb", help="Path to knowledge base")
parser.add_argument("--docs-db-path", default="embeddings_docs.db", help="Path to docs embeddings database")
parser.add_argument("--model", default="all-MiniLM-L6-v2", help="Sentence transformer model name")
args = parser.parse_args()
server = DocumentationMCPServer(args.kb_path, args.docs_db_path, args.model)
await server.run()
if __name__ == "__main__":
asyncio.run(main())