Boring Gemini

model_router.py•8.7 KiB

""" Model Router for Boring V13.2 Intelligently routes LLM requests to the appropriate backend: - Local LLM (llama-cpp-python) for simple tasks or offline mode - API (Gemini/Claude) for complex tasks requiring more capability Features: - Automatic complexity assessment - Cost-aware routing - Graceful fallback between backends - Task-type optimization """ import logging from enum import Enum from typing import Any logger = logging.getLogger(__name__) class TaskComplexity(Enum): """Task complexity levels for routing decisions.""" SIMPLE = "simple" # Doc generation, formatting, simple completions MEDIUM = "medium" # Code review, test generation, refactoring COMPLEX = "complex" # Architecture, multi-file changes, debugging class ModelCapability(Enum): """Model capability tiers.""" LOCAL_TINY = "local_tiny" # <2B params, fast but limited LOCAL_SMALL = "local_small" # 2-7B params, good for most tasks API_FAST = "api_fast" # Gemini Flash, Claude Haiku API_PRO = "api_pro" # Gemini Pro, Claude Sonnet # Task type to complexity mapping TASK_COMPLEXITY_MAP = { # Simple tasks - can use local LLM "docstring": TaskComplexity.SIMPLE, "comment": TaskComplexity.SIMPLE, "format": TaskComplexity.SIMPLE, "rename": TaskComplexity.SIMPLE, "type_hint": TaskComplexity.SIMPLE, # Medium tasks - prefer API but can fallback to local "test_gen": TaskComplexity.MEDIUM, "code_review": TaskComplexity.MEDIUM, "refactor_small": TaskComplexity.MEDIUM, "explain": TaskComplexity.MEDIUM, # Complex tasks - require API "architecture": TaskComplexity.COMPLEX, "debug": TaskComplexity.COMPLEX, "refactor_large": TaskComplexity.COMPLEX, "multi_file": TaskComplexity.COMPLEX, } class ModelRouter: """ Routes LLM requests to appropriate backends based on task requirements. Usage: router = ModelRouter() response = router.complete("Generate a docstring for...", task_type="docstring") """ def __init__(self, prefer_local: bool = False, offline_mode: bool = False): """ Initialize the router. Args: prefer_local: Prefer local models when possible offline_mode: Force local-only operation """ self.prefer_local = prefer_local self.offline_mode = offline_mode self._local_llm: Any | None = None self._api_client: Any | None = None @classmethod def from_settings(cls) -> "ModelRouter": """Create router from Boring settings.""" try: from ..core.config import settings return cls(prefer_local=settings.OFFLINE_MODE, offline_mode=settings.OFFLINE_MODE) except (ImportError, AttributeError): return cls() @property def local_llm(self) -> Any | None: """Lazy-load local LLM.""" if self._local_llm is None: try: from .local_llm import LocalLLM self._local_llm = LocalLLM.from_settings() except ImportError: pass return self._local_llm @property def has_local(self) -> bool: """Check if local LLM is available.""" llm = self.local_llm return llm is not None and llm.is_available @property def has_api(self) -> bool: """Check if API is available.""" if self.offline_mode: return False # Check for API key import os return bool(os.environ.get("GOOGLE_API_KEY") or os.environ.get("ANTHROPIC_API_KEY")) def assess_complexity(self, prompt: str, task_type: str | None = None) -> TaskComplexity: """ Assess task complexity from prompt and task type. Args: prompt: The user prompt task_type: Optional task type hint Returns: Assessed complexity level """ # Use explicit task type if provided if task_type and task_type in TASK_COMPLEXITY_MAP: return TASK_COMPLEXITY_MAP[task_type] # Heuristic assessment prompt_lower = prompt.lower() # Simple indicators simple_keywords = ["docstring", "comment", "format", "rename", "type hint"] if any(kw in prompt_lower for kw in simple_keywords): return TaskComplexity.SIMPLE # Complex indicators complex_keywords = ["architecture", "refactor entire", "multiple files", "debug", "fix bug"] if any(kw in prompt_lower for kw in complex_keywords): return TaskComplexity.COMPLEX # Check prompt length as proxy for complexity if len(prompt) < 200: return TaskComplexity.SIMPLE elif len(prompt) > 1000: return TaskComplexity.COMPLEX return TaskComplexity.MEDIUM def select_backend(self, complexity: TaskComplexity) -> str: """ Select the best backend for a task. Args: complexity: Task complexity Returns: Backend name: "local", "api_fast", or "api_pro" """ # Offline mode: always use local if self.offline_mode: if self.has_local: return "local" raise RuntimeError("Offline mode enabled but no local LLM available") # Simple tasks: prefer local if available and user prefers if complexity == TaskComplexity.SIMPLE: if self.prefer_local and self.has_local: return "local" if self.has_api: return "api_fast" if self.has_local: return "local" # Medium tasks: prefer API, fallback to local if complexity == TaskComplexity.MEDIUM: if self.has_api: return "api_fast" if self.has_local: return "local" # Complex tasks: require API pro tier if complexity == TaskComplexity.COMPLEX: if self.has_api: return "api_pro" # Fallback chain if self.has_api: return "api_fast" if self.has_local: return "local" raise RuntimeError("No LLM backend available (install local or configure API)") def complete( self, prompt: str, task_type: str | None = None, max_tokens: int = 1024, temperature: float = 0.7, force_backend: str | None = None, ) -> str | None: """ Generate a completion using the best available backend. Args: prompt: The prompt to complete task_type: Optional task type for routing max_tokens: Maximum tokens to generate temperature: Sampling temperature force_backend: Force a specific backend Returns: Generated text or None on failure """ # Determine backend if force_backend: backend = force_backend else: complexity = self.assess_complexity(prompt, task_type) backend = self.select_backend(complexity) logger.debug(f"Routing to backend: {backend}") # Route to backend if backend == "local": if self.local_llm and self.local_llm.is_available: return self.local_llm.complete(prompt, max_tokens, temperature) return None # API backends (placeholder - integrate with existing llm module) if backend in ("api_fast", "api_pro"): return self._call_api(prompt, backend, max_tokens, temperature) return None def _call_api(self, prompt: str, tier: str, max_tokens: int, temperature: float) -> str | None: """Call API backend (Gemini or Claude).""" try: from .sdk import BoringSDK # Select model based on tier if tier == "api_pro": model = "gemini-2.5-pro" else: model = "gemini-2.5-flash" sdk = BoringSDK() response = sdk.generate(prompt, model=model, max_tokens=max_tokens) return response except Exception as e: logger.error(f"API call failed: {e}") return None # Singleton instance _router: ModelRouter | None = None def get_router() -> ModelRouter: """Get the global model router instance.""" global _router if _router is None: _router = ModelRouter.from_settings() return _router def route_completion(prompt: str, task_type: str | None = None, **kwargs) -> str | None: """ Convenience function to route a completion request. Args: prompt: The prompt to complete task_type: Optional task type hint **kwargs: Additional arguments for complete() Returns: Generated text or None """ return get_router().complete(prompt, task_type=task_type, **kwargs)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Boring206/boring-gemini'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

model_router.py•8.7 KiB