Skip to main content
Glama

Dedalus MCP Documentation Server

by kitan23
main.py17.8 kB
""" Dedalus Documentation MCP Server A Model Context Protocol server for serving and querying documentation """ import os import sys import hashlib from pathlib import Path from typing import Optional, Dict, List, Any from datetime import datetime from collections import defaultdict import time from dotenv import load_dotenv from mcp.server.fastmcp import FastMCP # Load environment variables - try multiple locations env_path = Path('.') / '.env.local' if env_path.exists(): load_dotenv(env_path) load_dotenv() # Also load .env if exists # Get port from environment or command line args port = int(os.environ.get('PORT', '8080')) for i, arg in enumerate(sys.argv): if arg == '--port' and i + 1 < len(sys.argv): port = int(sys.argv[i + 1]) break # Get host from environment or command line args host = os.environ.get('HOST', '0.0.0.0') for i, arg in enumerate(sys.argv): if arg == '--host' and i + 1 < len(sys.argv): host = sys.argv[i + 1] break mcp = FastMCP( name='Documentation Server', host=host, port=port, instructions="""This MCP server provides access to documentation files with AI-powered search and Q&A capabilities. Available tools: - list_docs(): List all documentation files - search_docs(query): Search documentation with keywords - ask_docs(question): Get AI-powered answers from documentation - index_docs(): Index documents for better search - analyze_docs(task): Analyze documentation for specific tasks Resources: - docs://{path}: Access any markdown documentation file directly This server includes rate limiting (10 requests/minute) to protect API keys.""", ) # Configuration # Check for docs in multiple locations (for Dedalus deployment compatibility) possible_docs_dirs = [ Path(os.getenv('DOCS_DIR', './docs')), # Environment variable Path('/app/docs'), # Dedalus container path Path('./docs'), # Local path ] DOCS_DIR = None for dir_path in possible_docs_dirs: if dir_path.exists(): DOCS_DIR = dir_path break # If no docs dir exists, use a fallback that should exist if DOCS_DIR is None: # Try to create in /tmp as a last resort (always writable) DOCS_DIR = Path('/tmp/docs') try: DOCS_DIR.mkdir(parents=True, exist_ok=True) except (OSError, PermissionError): # If even /tmp fails, just use current directory DOCS_DIR = Path('.') except Exception: # Catch any other unexpected errors DOCS_DIR = Path('.') EMBEDDINGS_CACHE = {} METADATA_CACHE = {} # Rate limiting for API protection class RateLimiter: """Simple rate limiter to protect API keys from abuse""" def __init__(self, max_requests: int = 10, window_seconds: int = 60): self.max_requests = max_requests self.window_seconds = window_seconds self.requests = defaultdict(list) def is_allowed(self, identifier: str) -> bool: """Check if request is allowed for this identifier""" now = time.time() # Clean old requests outside window self.requests[identifier] = [ req_time for req_time in self.requests[identifier] if now - req_time < self.window_seconds ] # Check if under limit if len(self.requests[identifier]) < self.max_requests: self.requests[identifier].append(now) return True return False def get_reset_time(self, identifier: str) -> int: """Get seconds until rate limit resets""" if not self.requests[identifier]: return 0 oldest = min(self.requests[identifier]) return max(0, int(self.window_seconds - (time.time() - oldest))) # Initialize rate limiter (10 requests per minute) rate_limiter = RateLimiter(max_requests=10, window_seconds=60) def get_doc_metadata(file_path: Path) -> Dict[str, Any]: """Extract metadata from markdown files""" if file_path in METADATA_CACHE: return METADATA_CACHE[file_path] metadata = { 'title': file_path.stem.replace('-', ' ').title(), 'path': str(file_path.relative_to(DOCS_DIR)), 'modified': datetime.fromtimestamp(file_path.stat().st_mtime).isoformat(), 'size': file_path.stat().st_size, 'hash': hashlib.md5(file_path.read_bytes()).hexdigest(), } # Try to extract title from first # heading try: content = file_path.read_text() lines = content.split('\n') for line in lines[:10]: # Check first 10 lines if line.startswith('# '): metadata['title'] = line[2:].strip() break except (OSError, UnicodeDecodeError): pass METADATA_CACHE[file_path] = metadata return metadata @mcp.resource('docs://{path}') def get_documentation(path: str) -> str: """ Serve markdown documentation files Args: path: Path to the documentation file relative to docs directory Returns: Content of the markdown file """ file_path = DOCS_DIR / path if not file_path.exists(): raise ValueError(f'Documentation file not found: {path}') if not file_path.is_file(): raise ValueError(f'Path is not a file: {path}') if file_path.suffix not in ['.md', '.markdown', '.txt']: raise ValueError(f'Unsupported file type: {file_path.suffix}') return file_path.read_text() @mcp.tool() def list_docs(directory: Optional[str] = None) -> List[Dict[str, Any]]: """ List all available documentation files Args: directory: Optional subdirectory to list (relative to docs root) Returns: List of document metadata """ search_dir = DOCS_DIR if directory: search_dir = DOCS_DIR / directory if not search_dir.exists(): return [] docs = [] for file_path in search_dir.rglob('*.md'): if file_path.is_file(): docs.append(get_doc_metadata(file_path)) return sorted(docs, key=lambda x: x['path']) @mcp.tool() def search_docs( query: str, max_results: int = 5, search_content: bool = True, search_titles: bool = True, ) -> List[Dict[str, Any]]: """ Search documentation using keyword matching (semantic search ready) Args: query: Search query string max_results: Maximum number of results to return search_content: Whether to search in document content search_titles: Whether to search in document titles Returns: List of matching documents with relevance scores """ query_lower = query.lower() results = [] for file_path in DOCS_DIR.rglob('*.md'): if not file_path.is_file(): continue score = 0 metadata = get_doc_metadata(file_path) # Title matching if search_titles and query_lower in metadata['title'].lower(): score += 10 # Content matching if search_content: try: content = file_path.read_text().lower() # Count occurrences occurrences = content.count(query_lower) if occurrences > 0: score += min(occurrences, 5) # Cap at 5 points for content # Find snippet around first occurrence idx = content.find(query_lower) start = max(0, idx - 100) end = min(len(content), idx + 100) snippet = content[start:end] if start > 0: snippet = '...' + snippet if end < len(content): snippet = snippet + '...' metadata['snippet'] = snippet except (OSError, UnicodeDecodeError): pass if score > 0: metadata['relevance_score'] = score results.append(metadata) # Sort by relevance score results.sort(key=lambda x: x['relevance_score'], reverse=True) return results[:max_results] @mcp.tool() def ask_docs( question: str, context_docs: Optional[List[str]] = None, max_context_length: int = 4000, user_id: Optional[str] = None, ) -> Dict[str, Any]: """ Answer questions about documentation using AI Args: question: The question to answer context_docs: Optional list of document paths to use as context max_context_length: Maximum characters of context to include user_id: Optional user identifier for rate limiting Returns: AI-generated answer with sources """ # Rate limiting check identifier = user_id or 'default' if not rate_limiter.is_allowed(identifier): reset_time = rate_limiter.get_reset_time(identifier) return { 'error': 'Rate limit exceeded', 'message': f'Too many requests. Please wait {reset_time} seconds before trying again.', 'reset_in_seconds': reset_time, 'limit': '10 requests per minute', } # If no context docs specified, search for relevant ones if not context_docs: search_results = search_docs(question, max_results=3) context_docs = [result['path'] for result in search_results] # Gather context from documents context_parts = [] sources = [] total_length = 0 for doc_path in context_docs: if total_length >= max_context_length: break try: file_path = DOCS_DIR / doc_path content = file_path.read_text() # Truncate if needed remaining = max_context_length - total_length if len(content) > remaining: content = content[:remaining] + '...' context_parts.append(f'--- {doc_path} ---\n{content}') sources.append(doc_path) total_length += len(content) except (OSError, UnicodeDecodeError): continue if not context_parts: return { 'answer': "I couldn't find relevant documentation to answer your question.", 'sources': [], 'confidence': 'low', } full_context = '\n\n'.join(context_parts) # Try to use OpenAI if API key is available api_key = os.getenv('OPENAI_API_KEY') if api_key: try: from openai import OpenAI client = OpenAI(api_key=api_key) response = client.chat.completions.create( model='gpt-4o-mini', messages=[ { 'role': 'system', 'content': 'You are a helpful assistant that answers questions based on provided documentation. Only use information from the provided context.', }, { 'role': 'user', 'content': f"""Based on the following documentation, please answer this question: {question} Documentation: {full_context} Please provide a clear, concise answer based only on the provided documentation.""", }, ], temperature=0.7, max_tokens=500, ) return { 'answer': response.choices[0].message.content, 'sources': sources, 'context_length': total_length, 'model': 'gpt-4o-mini', 'confidence': 'high', } except Exception as e: # Fall back to context-only response if OpenAI fails return { 'answer': f'Error using OpenAI: {str(e)}', 'context': full_context[:500] + '...' if len(full_context) > 500 else full_context, 'sources': sources, 'context_length': total_length, 'error': str(e), } # If no API key, return context for Dedalus deployment return { 'question': question, 'context': full_context[:500] + '...' if len(full_context) > 500 else full_context, 'sources': sources, 'context_length': total_length, 'note': "No API key found. When deployed to Dedalus, this will use the platform's LLM integration via BYOK", } @mcp.tool() def index_docs(rebuild: bool = False) -> Dict[str, Any]: """ Index or re-index all documentation for improved search Args: rebuild: Whether to rebuild the entire index from scratch Returns: Indexing statistics """ if rebuild: METADATA_CACHE.clear() EMBEDDINGS_CACHE.clear() stats = { 'files_indexed': 0, 'total_size': 0, 'errors': [], 'timestamp': datetime.now().isoformat(), } for file_path in DOCS_DIR.rglob('*.md'): try: if file_path.is_file(): metadata = get_doc_metadata(file_path) stats['files_indexed'] += 1 stats['total_size'] += metadata['size'] # Here you would generate embeddings for semantic search # EMBEDDINGS_CACHE[file_path] = generate_embeddings(content) except Exception as e: stats['errors'].append({'file': str(file_path), 'error': str(e)}) return stats @mcp.tool() def analyze_docs( task: str, docs: Optional[List[str]] = None, output_format: str = 'summary' ) -> Dict[str, Any]: """ Analyze documentation for specific tasks (foundation for agent handoffs) Args: task: Analysis task (e.g., "find_gaps", "generate_outline", "check_consistency") docs: Optional list of specific documents to analyze output_format: Output format (summary, detailed, structured) Returns: Analysis results ready for agent handoff """ available_tasks = [ 'find_gaps', 'generate_outline', 'check_consistency', 'extract_examples', 'identify_prerequisites', 'suggest_improvements', ] if task not in available_tasks: return { 'error': f'Unknown task. Available tasks: {", ".join(available_tasks)}', 'available_tasks': available_tasks, } # Gather documents to analyze if not docs: all_docs = list_docs() docs = [doc['path'] for doc in all_docs] # This is where different analysis agents would be invoked # Structure the response for easy handoff to specialized agents return { 'task': task, 'documents_analyzed': len(docs), 'output_format': output_format, 'results': { 'summary': f"Analysis task '{task}' ready for processing", 'documents': docs, 'next_steps': [ 'Connect specialized agent for this task', 'Process documents according to task requirements', 'Return structured results', ], }, 'agent_handoff_ready': True, 'suggested_model': 'gpt-4' if task in ['find_gaps', 'check_consistency'] else 'claude-3-5-sonnet', } @mcp.prompt() def documentation_query(topic: str, detail_level: str = 'medium') -> str: """ Generate a prompt for querying documentation Args: topic: The topic to query about detail_level: Level of detail (brief, medium, comprehensive) Returns: A formatted prompt for documentation queries """ prompts = { 'brief': f'Provide a brief summary of {topic} from the documentation.', 'medium': f'Explain {topic} with examples and key points from the documentation.', 'comprehensive': f'Provide a comprehensive explanation of {topic} including all details, examples, and related concepts from the documentation.', } return prompts.get(detail_level, prompts['medium']) def main(): """Main entry point for the MCP server""" import argparse # Parse command line arguments parser = argparse.ArgumentParser(description='Dedalus Documentation MCP Server') parser.add_argument('--port', type=int, help='Port for HTTP transport') parser.add_argument( '--host', type=str, default='0.0.0.0', help='Host for HTTP transport' ) parser.add_argument('--stdio', action='store_true', help='Force STDIO transport') parser.add_argument('--test', action='store_true', help='Test mode') args = parser.parse_args() # Ensure docs directory exists (but don't fail if we can't create it) try: DOCS_DIR.mkdir(exist_ok=True) except Exception: # If we can't create the directory, just continue # The server can still respond to queries even without docs pass # Check if running in test mode if args.test: # Test mode - just verify everything loads print('Documentation MCP Server loaded successfully') print(f'Docs directory: {DOCS_DIR.absolute()}') print( 'Tools available: list_docs, search_docs, ask_docs, index_docs, analyze_docs' ) print(f'Documents found: {len(list(DOCS_DIR.rglob("*.md")))}') return 0 # Determine transport mode # Note: port and host are already configured in the FastMCP instance above if (args.port or os.environ.get('PORT')) and not args.stdio: # HTTP transport mode actual_host = host if not args.host else args.host actual_port = port if not args.port else args.port print(f'Starting HTTP server on {actual_host}:{actual_port}') print(f'MCP endpoint: http://{actual_host}:{actual_port}/mcp') mcp.run(transport='streamable-http') else: # STDIO transport (default for MCP) mcp.run('stdio') return 0 if __name__ == '__main__': import sys sys.exit(main())

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/kitan23/Python_MCP_Server_Example_2'

If you have feedback or need assistance with the MCP directory API, please join our Discord server