Skip to main content
Glama

MaverickMCP

by wshobson
MIT License
165
  • Apple
llm_optimization_config.py17.9 kB
""" LLM Optimization Configuration for Research Agents. This module provides configuration settings and presets for different optimization scenarios to prevent research agent timeouts while maintaining quality. """ from dataclasses import dataclass from enum import Enum from typing import Any from maverick_mcp.providers.openrouter_provider import TaskType class OptimizationMode(str, Enum): """Optimization modes for different use cases.""" EMERGENCY = "emergency" # <20s - Ultra-fast, minimal quality FAST = "fast" # 20-60s - Fast with reasonable quality BALANCED = "balanced" # 60-180s - Balance speed and quality COMPREHENSIVE = "comprehensive" # 180s+ - Full quality, time permitting class ResearchComplexity(str, Enum): """Research complexity levels.""" SIMPLE = "simple" # Basic queries, single focus MODERATE = "moderate" # Multi-faceted analysis COMPLEX = "complex" # Deep analysis, multiple dimensions EXPERT = "expert" # Highly specialized, technical @dataclass class OptimizationPreset: """Configuration preset for optimization settings.""" # Model Selection Settings prefer_fast: bool = True prefer_cheap: bool = True prefer_quality: bool = False # Token Budgeting max_input_tokens: int = 8000 max_output_tokens: int = 2000 emergency_reserve_tokens: int = 200 # Time Management search_time_allocation_pct: float = 0.20 # 20% for search analysis_time_allocation_pct: float = 0.60 # 60% for analysis synthesis_time_allocation_pct: float = 0.20 # 20% for synthesis # Content Processing max_sources: int = 10 max_content_length_per_source: int = 2000 parallel_batch_size: int = 3 # Early Termination target_confidence: float = 0.75 min_sources_before_termination: int = 3 diminishing_returns_threshold: float = 0.05 consensus_threshold: float = 0.8 # Quality vs Speed Trade-offs use_content_filtering: bool = True use_parallel_processing: bool = True use_early_termination: bool = True use_optimized_prompts: bool = True class OptimizationPresets: """Predefined optimization presets for common scenarios.""" EMERGENCY = OptimizationPreset( # Ultra-fast settings for <20 seconds prefer_fast=True, prefer_cheap=True, prefer_quality=False, max_input_tokens=2000, max_output_tokens=500, max_sources=3, max_content_length_per_source=800, parallel_batch_size=5, # Aggressive batching target_confidence=0.6, # Lower bar min_sources_before_termination=2, search_time_allocation_pct=0.15, analysis_time_allocation_pct=0.70, synthesis_time_allocation_pct=0.15, ) FAST = OptimizationPreset( # Fast settings for 20-60 seconds prefer_fast=True, prefer_cheap=True, prefer_quality=False, max_input_tokens=4000, max_output_tokens=1000, max_sources=6, max_content_length_per_source=1200, parallel_batch_size=3, target_confidence=0.70, min_sources_before_termination=3, ) BALANCED = OptimizationPreset( # Balanced settings for 60-180 seconds prefer_fast=False, prefer_cheap=True, prefer_quality=False, max_input_tokens=8000, max_output_tokens=2000, max_sources=10, max_content_length_per_source=2000, parallel_batch_size=2, target_confidence=0.75, min_sources_before_termination=3, ) COMPREHENSIVE = OptimizationPreset( # Comprehensive settings for 180+ seconds prefer_fast=False, prefer_cheap=False, prefer_quality=True, max_input_tokens=12000, max_output_tokens=3000, max_sources=15, max_content_length_per_source=3000, parallel_batch_size=1, # Less batching for quality target_confidence=0.80, min_sources_before_termination=5, use_early_termination=False, # Allow full processing search_time_allocation_pct=0.25, analysis_time_allocation_pct=0.55, synthesis_time_allocation_pct=0.20, ) @classmethod def get_preset(cls, mode: OptimizationMode) -> OptimizationPreset: """Get preset by optimization mode.""" preset_map = { OptimizationMode.EMERGENCY: cls.EMERGENCY, OptimizationMode.FAST: cls.FAST, OptimizationMode.BALANCED: cls.BALANCED, OptimizationMode.COMPREHENSIVE: cls.COMPREHENSIVE, } return preset_map[mode] @classmethod def get_adaptive_preset( cls, time_budget_seconds: float, complexity: ResearchComplexity = ResearchComplexity.MODERATE, current_confidence: float = 0.0, ) -> OptimizationPreset: """Get adaptive preset based on time budget and complexity.""" # Base mode selection by time if time_budget_seconds < 20: base_mode = OptimizationMode.EMERGENCY elif time_budget_seconds < 60: base_mode = OptimizationMode.FAST elif time_budget_seconds < 180: base_mode = OptimizationMode.BALANCED else: base_mode = OptimizationMode.COMPREHENSIVE # Get base preset preset = cls.get_preset(base_mode) # Adjust for complexity complexity_adjustments = { ResearchComplexity.SIMPLE: { "max_sources": int(preset.max_sources * 0.7), "target_confidence": preset.target_confidence - 0.1, "prefer_cheap": True, }, ResearchComplexity.MODERATE: { # No adjustments - use base preset }, ResearchComplexity.COMPLEX: { "max_sources": int(preset.max_sources * 1.3), "target_confidence": preset.target_confidence + 0.05, "max_input_tokens": int(preset.max_input_tokens * 1.2), }, ResearchComplexity.EXPERT: { "max_sources": int(preset.max_sources * 1.5), "target_confidence": preset.target_confidence + 0.1, "max_input_tokens": int(preset.max_input_tokens * 1.4), "prefer_quality": True, }, } # Apply complexity adjustments adjustments = complexity_adjustments.get(complexity, {}) for key, value in adjustments.items(): setattr(preset, key, value) # Adjust for current confidence if current_confidence > 0.6: # Already have good confidence, can be more aggressive with speed preset.target_confidence = max(preset.target_confidence - 0.1, 0.6) preset.max_sources = int(preset.max_sources * 0.8) preset.prefer_fast = True return preset class ModelSelectionStrategy: """Strategies for model selection in different scenarios.""" TIME_CRITICAL_MODELS = [ "google/gemini-2.5-flash", # 199 tokens/sec - FASTEST "openai/gpt-4o-mini", # 126 tokens/sec - Most cost-effective "openai/gpt-5-nano", # 180 tokens/sec - High speed "anthropic/claude-3.5-haiku", # 65.6 tokens/sec - Fallback ] BALANCED_MODELS = [ "google/gemini-2.5-flash", # 199 tokens/sec - Speed-optimized "openai/gpt-4o-mini", # 126 tokens/sec - Cost & speed balance "deepseek/deepseek-r1", # 90+ tokens/sec - Good value "anthropic/claude-sonnet-4", # High quality when needed "google/gemini-2.5-pro", # Comprehensive analysis "openai/gpt-5", # Fallback option ] QUALITY_MODELS = [ "google/gemini-2.5-pro", "anthropic/claude-opus-4.1", "anthropic/claude-sonnet-4", ] @classmethod def get_model_priority( cls, time_remaining: float, task_type: TaskType, complexity: ResearchComplexity = ResearchComplexity.MODERATE, ) -> list[str]: """Get prioritized model list for selection.""" if time_remaining < 30: # Emergency mode: ultra-fast models for <30s timeouts (prioritize speed) return cls.TIME_CRITICAL_MODELS[:2] # Use only the 2 fastest models elif time_remaining < 60: # Mix of fast and balanced models (speed-first approach) return cls.TIME_CRITICAL_MODELS[:3] + cls.BALANCED_MODELS[:2] elif complexity in [ResearchComplexity.COMPLEX, ResearchComplexity.EXPERT]: return cls.QUALITY_MODELS + cls.BALANCED_MODELS else: return cls.BALANCED_MODELS + cls.TIME_CRITICAL_MODELS class PromptOptimizationSettings: """Settings for prompt optimization strategies.""" # Template selection based on time constraints EMERGENCY_MAX_WORDS = {"content_analysis": 50, "synthesis": 40, "validation": 30} FAST_MAX_WORDS = {"content_analysis": 150, "synthesis": 200, "validation": 100} STANDARD_MAX_WORDS = {"content_analysis": 500, "synthesis": 800, "validation": 300} # Confidence-based prompt modifications HIGH_CONFIDENCE_ADDITIONS = [ "Focus on validation and contradictory evidence since confidence is already high.", "Look for edge cases and potential risks that may have been missed.", "Verify consistency across sources and identify any conflicting information.", ] LOW_CONFIDENCE_ADDITIONS = [ "Look for strong supporting evidence to build confidence in findings.", "Identify the most credible sources and weight them appropriately.", "Focus on consensus indicators and corroborating evidence.", ] @classmethod def get_word_limit(cls, prompt_type: str, time_remaining: float) -> int: """Get word limit for prompt type based on time remaining.""" if time_remaining < 15: return cls.EMERGENCY_MAX_WORDS.get(prompt_type, 50) elif time_remaining < 45: return cls.FAST_MAX_WORDS.get(prompt_type, 150) else: return cls.STANDARD_MAX_WORDS.get(prompt_type, 500) @classmethod def get_confidence_instruction(cls, confidence_level: float) -> str: """Get confidence-based instruction addition.""" if confidence_level > 0.7: import random return random.choice(cls.HIGH_CONFIDENCE_ADDITIONS) elif confidence_level < 0.4: import random return random.choice(cls.LOW_CONFIDENCE_ADDITIONS) else: return "" class OptimizationConfig: """Main configuration class for LLM optimizations.""" def __init__( self, mode: OptimizationMode = OptimizationMode.BALANCED, complexity: ResearchComplexity = ResearchComplexity.MODERATE, time_budget_seconds: float = 120.0, target_confidence: float = 0.75, custom_preset: OptimizationPreset | None = None, ): """Initialize optimization configuration. Args: mode: Optimization mode preset complexity: Research complexity level time_budget_seconds: Total time budget target_confidence: Target confidence threshold custom_preset: Custom preset overriding mode selection """ self.mode = mode self.complexity = complexity self.time_budget_seconds = time_budget_seconds self.target_confidence = target_confidence # Get optimization preset if custom_preset: self.preset = custom_preset else: self.preset = OptimizationPresets.get_adaptive_preset( time_budget_seconds, complexity, 0.0 ) # Override target confidence if specified if target_confidence != 0.75: # Non-default value self.preset.target_confidence = target_confidence def get_phase_time_budget(self, phase: str) -> float: """Get time budget for specific research phase.""" allocation_map = { "search": self.preset.search_time_allocation_pct, "analysis": self.preset.analysis_time_allocation_pct, "synthesis": self.preset.synthesis_time_allocation_pct, } return self.time_budget_seconds * allocation_map.get(phase, 0.33) def should_use_optimization(self, optimization_name: str) -> bool: """Check if specific optimization should be used.""" optimization_map = { "content_filtering": self.preset.use_content_filtering, "parallel_processing": self.preset.use_parallel_processing, "early_termination": self.preset.use_early_termination, "optimized_prompts": self.preset.use_optimized_prompts, } return optimization_map.get(optimization_name, True) def get_model_selection_params(self) -> dict[str, Any]: """Get model selection parameters.""" return { "prefer_fast": self.preset.prefer_fast, "prefer_cheap": self.preset.prefer_cheap, "prefer_quality": self.preset.prefer_quality, "max_tokens": self.preset.max_output_tokens, "complexity": self.complexity, } def get_token_allocation_params(self) -> dict[str, Any]: """Get token allocation parameters.""" return { "max_input_tokens": self.preset.max_input_tokens, "max_output_tokens": self.preset.max_output_tokens, "emergency_reserve": self.preset.emergency_reserve_tokens, } def get_content_filtering_params(self) -> dict[str, Any]: """Get content filtering parameters.""" return { "max_sources": self.preset.max_sources, "max_content_length": self.preset.max_content_length_per_source, "enabled": self.preset.use_content_filtering, } def get_parallel_processing_params(self) -> dict[str, Any]: """Get parallel processing parameters.""" return { "batch_size": self.preset.parallel_batch_size, "enabled": self.preset.use_parallel_processing, } def get_early_termination_params(self) -> dict[str, Any]: """Get early termination parameters.""" return { "target_confidence": self.preset.target_confidence, "min_sources": self.preset.min_sources_before_termination, "diminishing_returns_threshold": self.preset.diminishing_returns_threshold, "consensus_threshold": self.preset.consensus_threshold, "enabled": self.preset.use_early_termination, } def to_dict(self) -> dict[str, Any]: """Convert configuration to dictionary.""" return { "mode": self.mode.value, "complexity": self.complexity.value, "time_budget_seconds": self.time_budget_seconds, "target_confidence": self.target_confidence, "preset": { "prefer_fast": self.preset.prefer_fast, "prefer_cheap": self.preset.prefer_cheap, "prefer_quality": self.preset.prefer_quality, "max_input_tokens": self.preset.max_input_tokens, "max_output_tokens": self.preset.max_output_tokens, "max_sources": self.preset.max_sources, "parallel_batch_size": self.preset.parallel_batch_size, "target_confidence": self.preset.target_confidence, "optimizations_enabled": { "content_filtering": self.preset.use_content_filtering, "parallel_processing": self.preset.use_parallel_processing, "early_termination": self.preset.use_early_termination, "optimized_prompts": self.preset.use_optimized_prompts, }, }, } # Convenience functions for common configurations def create_emergency_config(time_budget: float = 15.0) -> OptimizationConfig: """Create emergency optimization configuration.""" return OptimizationConfig( mode=OptimizationMode.EMERGENCY, time_budget_seconds=time_budget, target_confidence=0.6, ) def create_fast_config(time_budget: float = 45.0) -> OptimizationConfig: """Create fast optimization configuration.""" return OptimizationConfig( mode=OptimizationMode.FAST, time_budget_seconds=time_budget, target_confidence=0.7, ) def create_balanced_config(time_budget: float = 120.0) -> OptimizationConfig: """Create balanced optimization configuration.""" return OptimizationConfig( mode=OptimizationMode.BALANCED, time_budget_seconds=time_budget, target_confidence=0.75, ) def create_comprehensive_config(time_budget: float = 300.0) -> OptimizationConfig: """Create comprehensive optimization configuration.""" return OptimizationConfig( mode=OptimizationMode.COMPREHENSIVE, time_budget_seconds=time_budget, target_confidence=0.8, ) def create_adaptive_config( time_budget_seconds: float, complexity: ResearchComplexity = ResearchComplexity.MODERATE, current_confidence: float = 0.0, ) -> OptimizationConfig: """Create adaptive configuration based on runtime parameters.""" # Auto-select mode based on time budget if time_budget_seconds < 20: mode = OptimizationMode.EMERGENCY elif time_budget_seconds < 60: mode = OptimizationMode.FAST elif time_budget_seconds < 180: mode = OptimizationMode.BALANCED else: mode = OptimizationMode.COMPREHENSIVE return OptimizationConfig( mode=mode, complexity=complexity, time_budget_seconds=time_budget_seconds, target_confidence=0.75 - (0.15 if current_confidence > 0.6 else 0), )

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/wshobson/maverick-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server