Portable MCP Toolkit

Overview Schema Related Servers Score Discussions

AIStack-MCP
python_agent

mcp_production_server.py•18 KiB

# python_agent/mcp_production_server.py """ Production MCP Server - Hybrid Architecture (November 2024 Standards) Architecture Pattern: - Local heavy lifting (Ollama + Qdrant) = FREE - Compressed context to Claude = CHEAP - Remote only for final generation = MINIMAL COST References: - Anthropic MCP Best Practices (Nov 2024) - Qdrant MCP Server (Official) - MCP-Ollama Integration Patterns - OWASP MCP Security Guidelines Stack: - FastMCP: MCP server framework - Ollama: Local LLM (Qwen3, phi4) - Qdrant: Vector database - Your existing agents: Graph analysis, impact analysis """ from fastmcp import FastMCP from typing import Optional, List, Dict, Any import asyncio import json import os from pathlib import Path # Try to import existing infrastructure, use placeholders if not available try: from tools.code_rag_tools import search_code as qdrant_search, upsert_code_snippets from tools.rag_tools import upsert_document from agents.impact_analysis_agent import ImpactAnalysisAgent from agents.call_graph_agent import CallGraphAgent from models.request import AgentRequest from models.response import AgentResponse from config.settings import settings except ImportError: # Placeholder implementations for development print("Warning: Some dependencies not found. Using placeholder implementations.") from config.settings import Settings settings = Settings() # Placeholder functions def qdrant_search(query: str, top_k: int = 5) -> List[Dict]: """Placeholder for Qdrant search""" return [] def upsert_code_snippets(snippets: List[Dict]) -> bool: """Placeholder for code snippet upsert""" return True def upsert_document(content: str, metadata: Dict) -> bool: """Placeholder for document upsert""" return True class AgentRequest: def __init__(self, message: str, context: Dict = None): self.message = message self.context = context or {} class AgentResponse: def __init__(self, success: bool, response: str): self.success = success self.response = response class ImpactAnalysisAgent: async def run(self, request: AgentRequest) -> AgentResponse: return AgentResponse(True, "Placeholder impact analysis") # Local LLM via Ollama try: from langchain_ollama import ChatOllama except ImportError: print("Warning: langchain_ollama not found. Install with: pip install langchain-ollama") ChatOllama = None from loguru import logger # Initialize MCP server mcp = FastMCP("AIStack Intelligence - Production") # Local LLM for heavy analysis (FREE) local_llm = None code_llm = None if ChatOllama: try: local_llm = ChatOllama( model="qwen2.5:8b", # Fast, good quality (updated from qwen3) base_url=os.getenv("OLLAMA_URL", "http://localhost:11434"), temperature=0.3, num_predict=512 # Limit output for speed ) # Alternative: phi4 for code generation code_llm = ChatOllama( model="phi4:14b", base_url=os.getenv("OLLAMA_URL", "http://localhost:11434"), temperature=0.2 ) logger.info("Ollama LLMs initialized successfully") except Exception as e: logger.warning(f"Failed to initialize Ollama LLMs: {e}") logger.warning("Some tools will have limited functionality") # ============================================================================ # SEMANTIC CODE SEARCH (Qdrant Vector Store) # Pattern: Local vector search, compressed results # Token savings: 90% (500 tokens vs 5000 for reading files) # ============================================================================ @mcp.tool() async def search_code_semantic( query: str, max_results: int = 5, min_score: float = 0.7 ) -> str: """ Semantic code search using LOCAL Qdrant vector database. Industry Pattern (Nov 2024): - Vector search happens locally (FREE, instant) - Returns compressed snippets (not full files) - 90% token reduction vs reading files directly Args: query: Natural language search query max_results: Number of results to return (default: 5) min_score: Minimum similarity score (0-1, default: 0.7) Returns: Compressed search results (~500 tokens) Example: "search_code_semantic('error handling patterns', max_results=5)" Reference: Qdrant MCP Server (Official), Dec 2024 """ try: logger.info(f"Semantic search: '{query}' (max_results={max_results})") # Use local Qdrant (FREE) hits = qdrant_search(query, top_k=max_results) # Filter by score hits = [h for h in hits if h.get("score", 0) >= min_score] if not hits: return f"No results found for '{query}' with score >= {min_score}" # Build compressed response result = f"Found {len(hits)} semantic matches for '{query}':\n\n" for i, hit in enumerate(hits, 1): metadata = hit.get("metadata", {}) score = hit.get("score", 0) text = hit.get("text", "")[:200] # Snippet only result += f"{i}. {metadata.get('path', 'unknown')} (score: {score:.2f})\n" result += f" {text.strip()}...\n\n" logger.info(f"Returned {len(hits)} results (~{len(result)} chars)") return result except Exception as e: logger.error(f"Semantic search failed: {e}") return f"Error: {str(e)}" # ============================================================================ # PATTERN ANALYSIS (Local LLM Processing) # Pattern: Ollama analyzes code, returns compressed summary # Token savings: 95% (200 tokens vs 4000 for full context) # ============================================================================ @mcp.tool() async def analyze_code_patterns( pattern_type: str, max_examples: int = 3 ) -> str: """ Analyze codebase patterns using LOCAL Ollama LLM. Industry Pattern (Nov 2024): - Local LLM reads and analyzes code (FREE) - Vector search finds relevant examples (FREE) - Returns compressed summary (200 tokens vs 4000) Args: pattern_type: Type of pattern to analyze (e.g., 'error_handling', 'async', 'dependency_injection') max_examples: Maximum code examples to include (default: 3) Returns: Compressed pattern summary (~200 tokens) Example: "analyze_code_patterns('error_handling', max_examples=3)" Reference: MCP-Ollama Integration (GitHub, May 2024) """ try: logger.info(f"Analyzing pattern: {pattern_type}") # Step 1: Find examples via local Qdrant (FREE) search_query = f"{pattern_type} patterns in code" examples = qdrant_search(search_query, top_k=10) if not examples: return f"No examples found for pattern type: {pattern_type}" # Step 2: Analyze with local LLM (Qwen2.5 - FREE) if not local_llm: # Fallback: return basic summary result = f"Pattern: {pattern_type}\n\n" result += f"Found {len(examples)} examples:\n" for i, ex in enumerate(examples[:max_examples], 1): result += f"{i}. {ex.get('metadata', {}).get('path', 'unknown')}\n" return result examples_text = "\n\n".join([ f"File: {ex.get('metadata', {}).get('path', 'unknown')}\n{ex.get('text', '')[:500]}" for ex in examples[:max_examples] ]) prompt = f"""Analyze this codebase pattern and provide a concise summary. Pattern Type: {pattern_type} Code Examples: {examples_text} Provide a summary (max 200 words) with: 1. Pattern name and key characteristics 2. 1-2 short code examples 3. When to use this pattern 4. Common pitfalls Be concise - this summary will be passed to another LLM.""" response = await local_llm.ainvoke(prompt) summary = response.content[:800] # Limit to ~200 tokens logger.info(f"Pattern analysis complete (~{len(summary)} chars)") return summary except Exception as e: logger.error(f"Pattern analysis failed: {e}") return f"Error: {str(e)}" # ============================================================================ # IMPACT ANALYSIS (Local Graph Processing) # Pattern: Local agents do heavy analysis, return compressed report # Token savings: 85% (300 tokens vs 2000 for full report) # ============================================================================ @mcp.tool() async def analyze_change_impact( target: str, change_description: str, detail_level: str = "summary" ) -> str: """ Multi-layer impact analysis using LOCAL agents. Industry Pattern (Nov 2024): - Call graph analysis (local) - File dependency analysis (local) - Semantic impact via Qdrant (local) - Compressed summary via Ollama (local) Args: target: Code element to analyze (e.g., 'TaskOrchestrator.HandleAsync') change_description: Description of proposed change detail_level: 'summary' (default) or 'detailed' Returns: Compressed impact analysis (~300 tokens for summary, ~800 for detailed) Example: "analyze_change_impact('TaskOrchestrator.HandleAsync', 'Add async error handling', detail_level='summary')" Reference: Your existing ImpactAnalysisAgent (proven in production) """ try: logger.info(f"Impact analysis: {target}") # Use your existing local agent (FREE) agent = ImpactAnalysisAgent() request = AgentRequest( message=change_description, context={"target": target} ) result = await agent.run(request) if not result.success: return f"Impact analysis failed: {result.response}" # Compress if needed if detail_level == "summary" and len(result.response) > 1000: if not local_llm: # Fallback: truncate return result.response[:1000] + "..." prompt = f"""Compress this impact analysis to 250 words: {result.response[:2500]} Focus on: - Affected files/methods count - Risk level (1-10 scale) - Key dependencies - Critical concerns only""" compressed = await local_llm.ainvoke(prompt) return compressed.content[:1000] # ~300 tokens logger.info(f"Impact analysis complete") return result.response if detail_level == "detailed" else result.response[:1000] except Exception as e: logger.error(f"Impact analysis failed: {e}") return f"Error: {str(e)}" # ============================================================================ # OPTIMIZED CONTEXT PREPARATION # Pattern: Local analysis produces minimal context for remote generation # Token savings: 85% (400 tokens vs 2700 for full file) # ============================================================================ @mcp.tool() async def get_code_context( file_path: str, task_description: str, include_patterns: bool = True ) -> str: """ Prepare optimized context for code generation. Industry Pattern (Nov 2024): - Read target file (local) - Find relevant patterns (local Qdrant) - Extract key sections (local LLM) - Return compressed context (400 tokens vs 2700) Args: file_path: Path to file needing changes task_description: What needs to be done include_patterns: Whether to include related patterns (default: True) Returns: Optimized context (~400 tokens vs 2700 for full file) Example: "get_code_context('src/agents/rag_agent.py', 'Add error handling', include_patterns=True)" Reference: Anthropic MCP Best Practices § Context Optimization """ try: logger.info(f"Preparing context for: {file_path}") # Step 1: Read target file (local) try: full_content = Path(file_path).read_text() except Exception as e: return f"Error reading {file_path}: {e}" # Step 2: Find relevant patterns if requested patterns = "" if include_patterns: pattern_type = task_description.split()[0].lower() # First word patterns = await analyze_code_patterns(pattern_type, max_examples=2) # Step 3: Extract relevant sections via local LLM if not local_llm: # Fallback: return first 1200 chars return f"File: {file_path}\nTask: {task_description}\n\n{full_content[:1200]}" prompt = f"""Extract minimal context for this task. File: {file_path} Task: {task_description} Relevant Patterns: {patterns} Full File Content (truncated): {full_content[:2000]} Provide context (max 300 words): 1. Current structure (brief) 2. Key patterns to follow 3. Important dependencies 4. Constraints/considerations Be concise - this goes to a code generation LLM.""" context = await local_llm.ainvoke(prompt) result = context.content[:1200] # ~400 tokens logger.info(f"Context prepared (~{len(result)} chars vs {len(full_content)} original)") return result except Exception as e: logger.error(f"Context preparation failed: {e}") return f"Error: {str(e)}" # ============================================================================ # CODE GENERATION (Local phi4 for patches) # Pattern: Local LLM generates code, optional remote validation # Token savings: 100% local generation (only remote if validation requested) # ============================================================================ @mcp.tool() async def generate_code_patch( file_path: str, change_description: str, validate_remote: bool = False ) -> str: """ Generate code patch using LOCAL phi4 LLM. Industry Pattern (Nov 2024): - phi4:14b generates high-quality code locally (FREE) - Optional remote validation for critical changes - Patch format for easy application Args: file_path: File to modify change_description: What changes to make validate_remote: Whether to validate with Claude (costs tokens) Returns: Unified diff patch Example: "generate_code_patch('src/agents/rag_agent.py', 'Add timeout handling', validate_remote=False)" Reference: Your existing PatchGeneratorAgent + phi4 """ try: logger.info(f"Generating patch for: {file_path}") # Get optimized context (local) context = await get_code_context(file_path, change_description) if not code_llm: return f"Error: Code LLM (phi4) not available. Please ensure Ollama is running with phi4:14b model." # Generate patch with local phi4 (FREE) prompt = f"""Generate a unified diff patch for this change. Context: {context} Change: {change_description} Return ONLY the unified diff patch, no explanations.""" response = await code_llm.ainvoke(prompt) patch = response.content if validate_remote: # Optional: Validate with Claude (costs tokens) # This would call Cursor's LLM via MCP logger.info("Remote validation requested (not implemented yet)") logger.info(f"Patch generated (~{len(patch)} chars)") return patch except Exception as e: logger.error(f"Patch generation failed: {e}") return f"Error: {str(e)}" # ============================================================================ # DOCUMENTATION SEARCH (RAG over your docs) # Pattern: Local vector search over documentation # ============================================================================ @mcp.tool() async def search_documentation( query: str, max_results: int = 5 ) -> str: """ Search documentation using RAG (Qdrant). Searches your ingested documentation and returns relevant passages. Args: query: Search query max_results: Number of results (default: 5) Returns: Relevant documentation passages Example: "search_documentation('LangGraph checkpointing', max_results=5)" """ # Uses same infrastructure as code search return await search_code_semantic(query, max_results) # ============================================================================ # FALLBACK: Direct file access (use sparingly - expensive) # ============================================================================ @mcp.tool() async def read_file_full( file_path: str ) -> str: """ Read full file content (USE SPARINGLY - sends many tokens to Claude). Only use when: - Optimized context insufficient - User explicitly requests full file - Verification after generation Args: file_path: Path to file Returns: Full file content (WARNING: Can be 2000+ tokens) """ try: content = Path(file_path).read_text() logger.warning(f"Full file read: {file_path} ({len(content)} chars - expensive!)") return content except Exception as e: return f"Error: {str(e)}" # ============================================================================ # SERVER STARTUP # ============================================================================ if __name__ == "__main__": logger.info("Starting AIStack Intelligence MCP Server (Production)") logger.info(f"Ollama: {os.getenv('OLLAMA_URL', 'http://localhost:11434')}") logger.info(f"Qdrant: {os.getenv('QDRANT_URL', 'http://localhost:6333')}") logger.info("Tools: 8 (semantic search, pattern analysis, impact analysis, etc.)") mcp.run()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mjdevaccount/AIStack-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

mcp_production_server.py•18 KiB