Llama Maverick Hub MCP Server

llama-service.ts•13.6 kB

/** * Llama Service Module * * Author: Yobie Benjamin * Version: 0.2 * Date: July 28, 2025 * * Integrates Llama Maverick AI model via Ollama. * Provides AI capabilities for the hub including routing decisions and synthesis. */ import { Ollama } from 'ollama'; import winston from 'winston'; import { ConfigManager } from '../config/config-manager.js'; const logger = winston.createLogger({ level: 'debug', format: winston.format.simple() }); /** * Llama completion options * Controls generation parameters */ interface CompletionOptions { temperature?: number; maxTokens?: number; topP?: number; topK?: number; stopSequences?: string[]; systemPrompt?: string; } /** * Llama model configuration * Defines model settings and capabilities */ interface LlamaConfig { model: string; baseUrl: string; defaultOptions: CompletionOptions; contextWindow: number; capabilities: { functionCalling: boolean; jsonMode: boolean; streaming: boolean; }; } /** * Service that manages Llama AI model interactions * Provides intelligent capabilities to the hub */ export class LlamaService { private ollama: Ollama; private config: LlamaConfig; private configManager: ConfigManager; private conversationHistory: Array<{ role: string; content: string }>; constructor(configManager: ConfigManager) { /** * Initialize configuration * Load Llama settings from config manager */ this.configManager = configManager; this.config = { model: 'llama3.2', // Will upgrade to llama4-maverick when available baseUrl: process.env.OLLAMA_BASE_URL || 'http://localhost:11434', defaultOptions: { temperature: 0.7, maxTokens: 2048, topP: 0.9, systemPrompt: this.getSystemPrompt() }, contextWindow: 8192, capabilities: { functionCalling: true, jsonMode: true, streaming: true } }; /** * Initialize Ollama client * Connect to local Ollama instance */ this.ollama = new Ollama({ host: this.config.baseUrl }); /** * Initialize conversation history * Maintains context across interactions */ this.conversationHistory = []; } /** * Initialize the Llama service * Verify model availability and warm up */ async initialize(): Promise<void> { logger.info('Initializing Llama service...'); try { /** * Check if model is available * Ensure Ollama has the required model */ const models = await this.ollama.list(); const modelAvailable = models.models.some( m => m.name.includes(this.config.model) ); if (!modelAvailable) { logger.warn(`Model ${this.config.model} not found, attempting to pull...`); await this.ollama.pull({ model: this.config.model }); } /** * Warm up the model * Initial generation to load model into memory */ await this.warmUp(); logger.info('Llama service initialized successfully'); } catch (error) { logger.error('Failed to initialize Llama service:', error); throw error; } } /** * Get the system prompt for Llama * Defines the AI's role and capabilities */ private getSystemPrompt(): string { return `You are Llama Maverick, an advanced AI orchestrator managing multiple MCP (Model Context Protocol) services. Your responsibilities: 1. Route requests to the appropriate MCP service based on capability matching 2. Orchestrate multi-service workflows for complex tasks 3. Synthesize results from multiple services into coherent responses 4. Make intelligent decisions about service selection and fallback strategies 5. Provide helpful explanations of multi-service operations Available capabilities: - Service discovery and health monitoring - Intelligent request routing - Multi-service workflow execution - Parallel service coordination - Result aggregation and synthesis You have access to various MCP services including Stripe for payments, databases for storage, and other specialized services. Always respond with clear, actionable information and explain your routing decisions when relevant.`; } /** * Warm up the model with initial generation * Ensures model is loaded and ready */ private async warmUp(): Promise<void> { logger.debug('Warming up Llama model...'); try { await this.ollama.generate({ model: this.config.model, prompt: 'Hello', stream: false }); logger.debug('Model warm-up complete'); } catch (error) { logger.warn('Model warm-up failed:', error); } } /** * Generate a completion from Llama * Core AI generation capability */ async complete( prompt: string, options?: CompletionOptions ): Promise<string> { const mergedOptions = { ...this.config.defaultOptions, ...options }; logger.debug('Generating completion', { prompt: prompt.substring(0, 100), options: mergedOptions }); try { /** * Build messages for conversation * Include system prompt and history */ const messages = [ { role: 'system', content: mergedOptions.systemPrompt || this.config.defaultOptions.systemPrompt! }, ...this.conversationHistory, { role: 'user', content: prompt } ]; /** * Generate completion using Ollama * Get AI response for the prompt */ const response = await this.ollama.chat({ model: this.config.model, messages, options: { temperature: mergedOptions.temperature, num_predict: mergedOptions.maxTokens, top_p: mergedOptions.topP, top_k: mergedOptions.topK, stop: mergedOptions.stopSequences }, stream: false }); /** * Update conversation history * Maintain context for follow-up interactions */ this.updateHistory('user', prompt); this.updateHistory('assistant', response.message.content); return response.message.content; } catch (error) { logger.error('Completion generation failed:', error); throw error; } } /** * Generate completion with additional context * Enriches AI responses with service-specific information */ async completeWithContext( prompt: string, context: Record<string, any>, options?: CompletionOptions ): Promise<string> { /** * Prepare enhanced prompt with context * Include relevant service information */ const enhancedPrompt = `${prompt} Available Context: ${JSON.stringify(context, null, 2)} Please use the above context to provide an informed response.`; return this.complete(enhancedPrompt, options); } /** * Generate structured JSON output * Useful for routing decisions and structured data */ async generateJson<T = any>( prompt: string, schema?: any, options?: CompletionOptions ): Promise<T> { /** * Enhance prompt for JSON generation * Request structured output format */ const jsonPrompt = `${prompt} IMPORTANT: Respond ONLY with valid JSON that matches this schema: ${schema ? JSON.stringify(schema, null, 2) : 'any valid JSON'} Do not include any explanation or text outside the JSON.`; const response = await this.complete(jsonPrompt, { ...options, temperature: 0.3 // Lower temperature for structured output }); try { /** * Parse JSON response * Extract structured data from AI output */ // Find JSON in response (handle potential markdown code blocks) const jsonMatch = response.match(/```json\n?([\s\S]*?)\n?```/) || response.match(/({[\s\S]*})/); const jsonStr = jsonMatch ? jsonMatch[1] : response; return JSON.parse(jsonStr); } catch (error) { logger.error('Failed to parse JSON response:', error); logger.debug('Raw response:', response); throw new Error('Failed to generate valid JSON'); } } /** * Analyze a request to determine routing * Uses AI to decide which service should handle a request */ async analyzeRouting( request: string, availableServices: string[] ): Promise<{ service: string; confidence: number; reasoning: string; }> { const prompt = `Analyze this request and determine which service should handle it: Request: ${request} Available Services: ${availableServices.join(', ')} Provide your analysis as JSON with this structure: { "service": "selected_service_name", "confidence": 0.0-1.0, "reasoning": "explanation of why this service was chosen" }`; return this.generateJson(prompt); } /** * Plan a multi-service workflow * Uses AI to design complex multi-step operations */ async planWorkflow( goal: string, availableServices: string[], availableTools: string[] ): Promise<{ steps: Array<{ service: string; tool: string; description: string; dependencies: string[]; }>; explanation: string; }> { const prompt = `Plan a workflow to achieve this goal: Goal: ${goal} Available Services: ${availableServices.join(', ')} Available Tools: ${availableTools.join(', ')} Design a step-by-step workflow using the available services and tools. Provide your plan as JSON with this structure: { "steps": [ { "service": "service_name", "tool": "tool_name", "description": "what this step does", "dependencies": ["previous_step_ids"] } ], "explanation": "overall workflow explanation" }`; return this.generateJson(prompt); } /** * Synthesize results from multiple services * Combines and summarizes multi-source data */ async synthesizeResults( results: Record<string, any>, originalRequest: string ): Promise<string> { const prompt = `Synthesize these results from multiple services into a coherent response: Original Request: ${originalRequest} Service Results: ${JSON.stringify(results, null, 2)} Provide a clear, comprehensive summary that addresses the original request using all available information.`; return this.complete(prompt, { temperature: 0.5 // Balanced temperature for synthesis }); } /** * Handle errors with AI assistance * Provides intelligent error analysis and recovery suggestions */ async analyzeError( error: Error, context: Record<string, any> ): Promise<{ analysis: string; suggestions: string[]; canRetry: boolean; }> { const prompt = `Analyze this error and provide recovery suggestions: Error: ${error.message} Error Type: ${error.name} Context: ${JSON.stringify(context, null, 2)} Provide analysis as JSON: { "analysis": "explanation of what went wrong", "suggestions": ["list", "of", "recovery", "suggestions"], "canRetry": true/false }`; return this.generateJson(prompt); } /** * Update conversation history * Maintains context across interactions */ private updateHistory(role: string, content: string): void { this.conversationHistory.push({ role, content }); /** * Trim history if it exceeds context window * Keep most recent interactions */ const maxHistorySize = 10; if (this.conversationHistory.length > maxHistorySize) { this.conversationHistory = this.conversationHistory.slice(-maxHistorySize); } } /** * Clear conversation history * Reset context for new conversation */ clearHistory(): void { this.conversationHistory = []; logger.debug('Conversation history cleared'); } /** * Get model information * Returns current model configuration */ getModelInfo(): { model: string; contextWindow: number; capabilities: any; } { return { model: this.config.model, contextWindow: this.config.contextWindow, capabilities: this.config.capabilities }; } /** * Stream a completion * Provides real-time generation for long responses */ async *streamComplete( prompt: string, options?: CompletionOptions ): AsyncGenerator<string, void, unknown> { const mergedOptions = { ...this.config.defaultOptions, ...options }; try { /** * Stream generation from Ollama * Yield tokens as they're generated */ const response = await this.ollama.chat({ model: this.config.model, messages: [ { role: 'system', content: mergedOptions.systemPrompt || this.config.defaultOptions.systemPrompt! }, { role: 'user', content: prompt } ], options: { temperature: mergedOptions.temperature, num_predict: mergedOptions.maxTokens }, stream: true }); let fullResponse = ''; for await (const part of response) { if (part.message?.content) { fullResponse += part.message.content; yield part.message.content; } } /** * Update history with complete response * Store full generation for context */ this.updateHistory('user', prompt); this.updateHistory('assistant', fullResponse); } catch (error) { logger.error('Stream generation failed:', error); throw error; } } }

Latest Blog Posts

What Is Context Bloat in MCP?
By Om-Shree-0709 on December 16, 2025.
mcp
Context Bloat
MCP Moves to the Linux Foundation: Neutral Stewardship for Agentic Infrastructure
By Om-Shree-0709 on December 15, 2025.
mcp
anthropic
Linux Foundation
Code Execution with MCP: Architecting Agentic Efficiency
By Om-Shree-0709 on December 14, 2025.
mcp
Token bloat

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/YobieBen/llama-maverick-hub-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server