MCP LLM Integration Server

main.py•4.46 KiB

#!/usr/bin/env python3 import asyncio import logging import json from typing import Any, Sequence from mcp.server import Server try: import httpx except ImportError: httpx = None from mcp.types import ( CallToolRequest, CallToolResult, ListToolsRequest, ListToolsResult, Tool, TextContent, ) from mcp.server.stdio import stdio_server # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Create server instance server = Server("llm-integration-server") @server.list_tools() async def list_tools() -> list[Tool]: """List available tools for the LLM.""" return [ Tool( name="llm_predict", description="Process text input through a local LLM", inputSchema={ "type": "object", "properties": { "prompt": { "type": "string", "description": "The text prompt to send to the LLM" }, "max_tokens": { "type": "integer", "description": "Maximum number of tokens to generate", "default": 100 } }, "required": ["prompt"] } ), Tool( name="echo", description="Echo back the input text for testing", inputSchema={ "type": "object", "properties": { "text": { "type": "string", "description": "Text to echo back" } }, "required": ["text"] } ) ] @server.call_tool() async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]: """Handle tool calls from the LLM client.""" logger.info(f"Tool called: {name} with arguments: {arguments}") if name == "llm_predict": prompt = arguments.get("prompt", "") max_tokens = arguments.get("max_tokens", 100) # Placeholder for actual LLM integration # Replace this with your actual LLM inference code response = await perform_llm_inference(prompt, max_tokens) return [ TextContent( type="text", text=f"LLM Response: {response}" ) ] elif name == "echo": text = arguments.get("text", "") return [ TextContent( type="text", text=f"Echo: {text}" ) ] else: raise ValueError(f"Unknown tool: {name}") async def perform_llm_inference(prompt: str, max_tokens: int = 100) -> str: """Perform LLM inference using Ollama. This function connects to a local Ollama instance and generates responses. Make sure Ollama is running: ollama serve """ try: if httpx is None: return "Error: httpx not installed. Run: pip install httpx" # Ollama API endpoint ollama_url = "http://localhost:11434/api/generate" # Prepare the request payload payload = { "model": "llama3.2", # Change this to your preferred model "prompt": prompt, "stream": False, "options": { "num_predict": max_tokens } } async with httpx.AsyncClient(timeout=30.0) as client: response = await client.post(ollama_url, json=payload) if response.status_code == 200: result = response.json() return result.get("response", "No response generated") else: return f"Error: Ollama request failed with status {response.status_code}" except httpx.ConnectError: return "Error: Could not connect to Ollama. Make sure Ollama is running (ollama serve)" except Exception as e: logger.error(f"LLM inference error: {e}") return f"Error during LLM inference: {str(e)}" async def main(): """Run the MCP server.""" logger.info("Starting MCP LLM Integration Server...") # Run the server using stdio transport async with stdio_server() as (read_stream, write_stream): await server.run( read_stream, write_stream, server.create_initialization_options() ) if __name__ == "__main__": asyncio.run(main())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/raptor7197/mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

main.py•4.46 KiB