server.pyโข13.5 kB
#!/usr/bin/env python3
"""
MCP Server Main Module
Author: Yobie Benjamin
Version: 0.9
Date: August 1, 2025
This module implements the main MCP server that handles all protocol communications.
It manages tools, prompts, resources, and completions through the Model Context Protocol.
The server uses stdio transport for communication with Claude Desktop and implements
the full MCP specification including:
- Tool discovery and execution
- Prompt templates
- Resource management
- Model completions via Ollama
Usage:
python -m llama4_maverick_mcp.server
or
llama4-mcp (if installed)
"""
import asyncio
import json
import logging
import sys
from typing import Any, Dict, List, Optional
from mcp.server import Server, NotificationOptions
from mcp.server.models import InitializationOptions
from mcp.server.stdio import stdio_server
from mcp.types import (
Tool,
Prompt,
Resource,
TextContent,
ImageContent,
EmbeddedResource,
)
from .config import Config
from .llama_service import LlamaService
from .tools import ToolManager
from .prompts import PromptManager
from .resources import ResourceManager
from .utils.logger import setup_logger
# Initialize logger
logger = setup_logger(__name__)
class MCPServer:
"""
Main MCP Server class that orchestrates all server functionality.
This class manages:
- Server initialization and lifecycle
- Request routing and handling
- Service coordination (Llama, Tools, Prompts, Resources)
- Error handling and logging
"""
def __init__(self, config: Optional[Config] = None):
"""
Initialize the MCP server with configuration.
Args:
config: Configuration object (uses defaults if not provided)
"""
self.config = config or Config()
self.logger = logger
# Log server initialization
self.logger.info(
"Initializing Llama 4 Maverick MCP Server",
version="0.9",
author="Yobie Benjamin"
)
# Initialize services
self.llama_service = LlamaService(self.config)
self.tool_manager = ToolManager(self.config)
self.prompt_manager = PromptManager(self.config)
self.resource_manager = ResourceManager(self.config)
# Create MCP server instance
self.server = Server(
name="llama4-maverick-mcp-python",
version="0.9.0"
)
# Register handlers
self._register_handlers()
def _register_handlers(self):
"""
Register all MCP protocol handlers.
This method sets up handlers for:
- Tool listing and execution
- Prompt listing and retrieval
- Resource listing and reading
- Completion requests
"""
# Tool handlers
@self.server.list_tools()
async def handle_list_tools() -> List[Tool]:
"""
Handle tool listing requests.
Returns all available tools with their schemas.
"""
self.logger.debug("Listing available tools")
tools = await self.tool_manager.get_tools()
self.logger.info(f"Returning {len(tools)} tools")
return tools
@self.server.call_tool()
async def handle_call_tool(name: str, arguments: Optional[Dict[str, Any]] = None) -> List[TextContent]:
"""
Handle tool execution requests.
Args:
name: Name of the tool to execute
arguments: Tool arguments
Returns:
List containing tool execution results
"""
self.logger.info(f"Executing tool: {name}", arguments=arguments)
try:
result = await self.tool_manager.execute_tool(name, arguments or {})
return [TextContent(
type="text",
text=json.dumps(result, indent=2)
)]
except Exception as e:
self.logger.error(f"Tool execution failed: {e}", tool=name, error=str(e))
return [TextContent(
type="text",
text=json.dumps({
"error": str(e),
"tool": name,
"status": "failed"
}, indent=2)
)]
# Prompt handlers
@self.server.list_prompts()
async def handle_list_prompts() -> List[Prompt]:
"""
Handle prompt listing requests.
Returns all available prompt templates.
"""
self.logger.debug("Listing available prompts")
prompts = await self.prompt_manager.get_prompts()
self.logger.info(f"Returning {len(prompts)} prompts")
return prompts
@self.server.get_prompt()
async def handle_get_prompt(name: str, arguments: Optional[Dict[str, Any]] = None) -> Prompt:
"""
Handle prompt retrieval requests.
Args:
name: Name of the prompt template
arguments: Variables to fill in the template
Returns:
Filled prompt template
"""
self.logger.info(f"Getting prompt: {name}", arguments=arguments)
try:
prompt = await self.prompt_manager.get_prompt(name, arguments or {})
return prompt
except Exception as e:
self.logger.error(f"Failed to get prompt: {e}", prompt=name, error=str(e))
raise
# Resource handlers
@self.server.list_resources()
async def handle_list_resources() -> List[Resource]:
"""
Handle resource listing requests.
Returns all available resources.
"""
self.logger.debug("Listing available resources")
resources = await self.resource_manager.get_resources()
self.logger.info(f"Returning {len(resources)} resources")
return resources
@self.server.read_resource()
async def handle_read_resource(uri: str) -> str:
"""
Handle resource reading requests.
Args:
uri: URI of the resource to read
Returns:
Resource content as string
"""
self.logger.info(f"Reading resource: {uri}")
try:
content = await self.resource_manager.read_resource(uri)
return content
except Exception as e:
self.logger.error(f"Failed to read resource: {e}", uri=uri, error=str(e))
raise
# Completion handler
@self.server.complete()
async def handle_complete(
prompt: Optional[str] = None,
messages: Optional[List[Dict[str, str]]] = None,
max_tokens: Optional[int] = None,
temperature: Optional[float] = None,
model: Optional[str] = None,
**kwargs
) -> str:
"""
Handle completion requests using Llama model.
Args:
prompt: Single prompt string
messages: List of chat messages
max_tokens: Maximum tokens to generate
temperature: Sampling temperature
model: Model to use (overrides default)
**kwargs: Additional model parameters
Returns:
Generated completion text
"""
self.logger.info(
"Processing completion request",
has_prompt=bool(prompt),
message_count=len(messages) if messages else 0,
model=model or self.config.llama_model_name
)
try:
# Use either prompt or messages
if messages:
completion = await self.llama_service.complete_chat(
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
model=model,
**kwargs
)
else:
completion = await self.llama_service.complete(
prompt=prompt or "Hello",
max_tokens=max_tokens,
temperature=temperature,
model=model,
**kwargs
)
self.logger.info("Completion successful", length=len(completion))
return completion
except Exception as e:
self.logger.error(f"Completion failed: {e}", error=str(e))
raise
async def initialize(self):
"""
Initialize all server components.
This method:
1. Initializes the Llama service and checks model availability
2. Loads all tools, prompts, and resources
3. Performs warm-up operations
"""
self.logger.info("Starting server initialization...")
try:
# Initialize services in parallel for faster startup
await asyncio.gather(
self.llama_service.initialize(),
self.tool_manager.initialize(),
self.prompt_manager.initialize(),
self.resource_manager.initialize(),
)
self.logger.info(
"Server initialization complete",
tools=len(await self.tool_manager.get_tools()),
prompts=len(await self.prompt_manager.get_prompts()),
resources=len(await self.resource_manager.get_resources()),
model=self.config.llama_model_name
)
except Exception as e:
self.logger.error(f"Failed to initialize server: {e}", error=str(e))
raise
async def run(self):
"""
Run the MCP server.
This method starts the stdio server and handles the main event loop.
The server will run until interrupted or an error occurs.
"""
self.logger.info("Starting MCP server...")
try:
# Initialize server components
await self.initialize()
# Configure initialization options
init_options = InitializationOptions(
server_name="llama4-maverick-mcp-python",
server_version="0.9.0",
capabilities=self.server.get_capabilities()
)
# Run the stdio server
self.logger.info(
"MCP server running",
transport="stdio",
capabilities=["tools", "prompts", "resources", "completion"]
)
async with stdio_server() as (read_stream, write_stream):
await self.server.run(
read_stream,
write_stream,
init_options,
raise_exceptions=self.config.debug_mode
)
except KeyboardInterrupt:
self.logger.info("Server shutdown requested")
except Exception as e:
self.logger.error(f"Server error: {e}", error=str(e), exc_info=True)
raise
finally:
await self.cleanup()
async def cleanup(self):
"""
Clean up server resources.
This method ensures proper cleanup of:
- Model connections
- Open files
- Network connections
- Temporary resources
"""
self.logger.info("Cleaning up server resources...")
try:
await asyncio.gather(
self.llama_service.cleanup(),
self.tool_manager.cleanup(),
self.resource_manager.cleanup(),
return_exceptions=True
)
self.logger.info("Server cleanup complete")
except Exception as e:
self.logger.error(f"Error during cleanup: {e}", error=str(e))
def main():
"""
Main entry point for the MCP server.
This function:
1. Sets up the async event loop
2. Creates and runs the MCP server
3. Handles graceful shutdown
"""
# Configure asyncio for better performance
if sys.platform != "win32":
try:
import uvloop
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
logger.info("Using uvloop for better performance")
except ImportError:
logger.debug("uvloop not available, using default event loop")
# Create and run server
try:
config = Config()
server = MCPServer(config)
# Run the server
asyncio.run(server.run())
except Exception as e:
logger.error(f"Fatal error: {e}", error=str(e), exc_info=True)
sys.exit(1)
except KeyboardInterrupt:
logger.info("Server stopped by user")
sys.exit(0)
if __name__ == "__main__":
main()