Skip to main content
Glama

Llama 4 Maverick MCP Server

by YobieBen
server.pyโ€ข13.5 kB
#!/usr/bin/env python3 """ MCP Server Main Module Author: Yobie Benjamin Version: 0.9 Date: August 1, 2025 This module implements the main MCP server that handles all protocol communications. It manages tools, prompts, resources, and completions through the Model Context Protocol. The server uses stdio transport for communication with Claude Desktop and implements the full MCP specification including: - Tool discovery and execution - Prompt templates - Resource management - Model completions via Ollama Usage: python -m llama4_maverick_mcp.server or llama4-mcp (if installed) """ import asyncio import json import logging import sys from typing import Any, Dict, List, Optional from mcp.server import Server, NotificationOptions from mcp.server.models import InitializationOptions from mcp.server.stdio import stdio_server from mcp.types import ( Tool, Prompt, Resource, TextContent, ImageContent, EmbeddedResource, ) from .config import Config from .llama_service import LlamaService from .tools import ToolManager from .prompts import PromptManager from .resources import ResourceManager from .utils.logger import setup_logger # Initialize logger logger = setup_logger(__name__) class MCPServer: """ Main MCP Server class that orchestrates all server functionality. This class manages: - Server initialization and lifecycle - Request routing and handling - Service coordination (Llama, Tools, Prompts, Resources) - Error handling and logging """ def __init__(self, config: Optional[Config] = None): """ Initialize the MCP server with configuration. Args: config: Configuration object (uses defaults if not provided) """ self.config = config or Config() self.logger = logger # Log server initialization self.logger.info( "Initializing Llama 4 Maverick MCP Server", version="0.9", author="Yobie Benjamin" ) # Initialize services self.llama_service = LlamaService(self.config) self.tool_manager = ToolManager(self.config) self.prompt_manager = PromptManager(self.config) self.resource_manager = ResourceManager(self.config) # Create MCP server instance self.server = Server( name="llama4-maverick-mcp-python", version="0.9.0" ) # Register handlers self._register_handlers() def _register_handlers(self): """ Register all MCP protocol handlers. This method sets up handlers for: - Tool listing and execution - Prompt listing and retrieval - Resource listing and reading - Completion requests """ # Tool handlers @self.server.list_tools() async def handle_list_tools() -> List[Tool]: """ Handle tool listing requests. Returns all available tools with their schemas. """ self.logger.debug("Listing available tools") tools = await self.tool_manager.get_tools() self.logger.info(f"Returning {len(tools)} tools") return tools @self.server.call_tool() async def handle_call_tool(name: str, arguments: Optional[Dict[str, Any]] = None) -> List[TextContent]: """ Handle tool execution requests. Args: name: Name of the tool to execute arguments: Tool arguments Returns: List containing tool execution results """ self.logger.info(f"Executing tool: {name}", arguments=arguments) try: result = await self.tool_manager.execute_tool(name, arguments or {}) return [TextContent( type="text", text=json.dumps(result, indent=2) )] except Exception as e: self.logger.error(f"Tool execution failed: {e}", tool=name, error=str(e)) return [TextContent( type="text", text=json.dumps({ "error": str(e), "tool": name, "status": "failed" }, indent=2) )] # Prompt handlers @self.server.list_prompts() async def handle_list_prompts() -> List[Prompt]: """ Handle prompt listing requests. Returns all available prompt templates. """ self.logger.debug("Listing available prompts") prompts = await self.prompt_manager.get_prompts() self.logger.info(f"Returning {len(prompts)} prompts") return prompts @self.server.get_prompt() async def handle_get_prompt(name: str, arguments: Optional[Dict[str, Any]] = None) -> Prompt: """ Handle prompt retrieval requests. Args: name: Name of the prompt template arguments: Variables to fill in the template Returns: Filled prompt template """ self.logger.info(f"Getting prompt: {name}", arguments=arguments) try: prompt = await self.prompt_manager.get_prompt(name, arguments or {}) return prompt except Exception as e: self.logger.error(f"Failed to get prompt: {e}", prompt=name, error=str(e)) raise # Resource handlers @self.server.list_resources() async def handle_list_resources() -> List[Resource]: """ Handle resource listing requests. Returns all available resources. """ self.logger.debug("Listing available resources") resources = await self.resource_manager.get_resources() self.logger.info(f"Returning {len(resources)} resources") return resources @self.server.read_resource() async def handle_read_resource(uri: str) -> str: """ Handle resource reading requests. Args: uri: URI of the resource to read Returns: Resource content as string """ self.logger.info(f"Reading resource: {uri}") try: content = await self.resource_manager.read_resource(uri) return content except Exception as e: self.logger.error(f"Failed to read resource: {e}", uri=uri, error=str(e)) raise # Completion handler @self.server.complete() async def handle_complete( prompt: Optional[str] = None, messages: Optional[List[Dict[str, str]]] = None, max_tokens: Optional[int] = None, temperature: Optional[float] = None, model: Optional[str] = None, **kwargs ) -> str: """ Handle completion requests using Llama model. Args: prompt: Single prompt string messages: List of chat messages max_tokens: Maximum tokens to generate temperature: Sampling temperature model: Model to use (overrides default) **kwargs: Additional model parameters Returns: Generated completion text """ self.logger.info( "Processing completion request", has_prompt=bool(prompt), message_count=len(messages) if messages else 0, model=model or self.config.llama_model_name ) try: # Use either prompt or messages if messages: completion = await self.llama_service.complete_chat( messages=messages, max_tokens=max_tokens, temperature=temperature, model=model, **kwargs ) else: completion = await self.llama_service.complete( prompt=prompt or "Hello", max_tokens=max_tokens, temperature=temperature, model=model, **kwargs ) self.logger.info("Completion successful", length=len(completion)) return completion except Exception as e: self.logger.error(f"Completion failed: {e}", error=str(e)) raise async def initialize(self): """ Initialize all server components. This method: 1. Initializes the Llama service and checks model availability 2. Loads all tools, prompts, and resources 3. Performs warm-up operations """ self.logger.info("Starting server initialization...") try: # Initialize services in parallel for faster startup await asyncio.gather( self.llama_service.initialize(), self.tool_manager.initialize(), self.prompt_manager.initialize(), self.resource_manager.initialize(), ) self.logger.info( "Server initialization complete", tools=len(await self.tool_manager.get_tools()), prompts=len(await self.prompt_manager.get_prompts()), resources=len(await self.resource_manager.get_resources()), model=self.config.llama_model_name ) except Exception as e: self.logger.error(f"Failed to initialize server: {e}", error=str(e)) raise async def run(self): """ Run the MCP server. This method starts the stdio server and handles the main event loop. The server will run until interrupted or an error occurs. """ self.logger.info("Starting MCP server...") try: # Initialize server components await self.initialize() # Configure initialization options init_options = InitializationOptions( server_name="llama4-maverick-mcp-python", server_version="0.9.0", capabilities=self.server.get_capabilities() ) # Run the stdio server self.logger.info( "MCP server running", transport="stdio", capabilities=["tools", "prompts", "resources", "completion"] ) async with stdio_server() as (read_stream, write_stream): await self.server.run( read_stream, write_stream, init_options, raise_exceptions=self.config.debug_mode ) except KeyboardInterrupt: self.logger.info("Server shutdown requested") except Exception as e: self.logger.error(f"Server error: {e}", error=str(e), exc_info=True) raise finally: await self.cleanup() async def cleanup(self): """ Clean up server resources. This method ensures proper cleanup of: - Model connections - Open files - Network connections - Temporary resources """ self.logger.info("Cleaning up server resources...") try: await asyncio.gather( self.llama_service.cleanup(), self.tool_manager.cleanup(), self.resource_manager.cleanup(), return_exceptions=True ) self.logger.info("Server cleanup complete") except Exception as e: self.logger.error(f"Error during cleanup: {e}", error=str(e)) def main(): """ Main entry point for the MCP server. This function: 1. Sets up the async event loop 2. Creates and runs the MCP server 3. Handles graceful shutdown """ # Configure asyncio for better performance if sys.platform != "win32": try: import uvloop asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) logger.info("Using uvloop for better performance") except ImportError: logger.debug("uvloop not available, using default event loop") # Create and run server try: config = Config() server = MCPServer(config) # Run the server asyncio.run(server.run()) except Exception as e: logger.error(f"Fatal error: {e}", error=str(e), exc_info=True) sys.exit(1) except KeyboardInterrupt: logger.info("Server stopped by user") sys.exit(0) if __name__ == "__main__": main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/YobieBen/llama4-maverick-mcp-python'

If you have feedback or need assistance with the MCP directory API, please join our Discord server