"""
LLM Service - Backend service for LLM management and generation
Integrates with local-llm-mcp server for model management and chat
"""
import logging
from datetime import datetime
from typing import Any
from mcp_client import mcp_client
logger = logging.getLogger(__name__)
class LLMService:
"""Service for managing LLMs and chat operations"""
def __init__(self):
self.active_models: dict[str, dict[str, Any]] = {}
self.chat_sessions: dict[str, list[dict[str, str]]] = {}
self.personalities: dict[str, dict[str, str]] = {
"assistant": {
"name": "Assistant",
"system_prompt": "You are a helpful, harmless, and honest assistant.",
"temperature": 0.7,
"max_tokens": 2000,
},
"robotics_expert": {
"name": "Robotics Expert",
"system_prompt": "You are an expert in robotics, automation, and control systems. Provide technical, precise answers about robotics hardware, software, and best practices.",
"temperature": 0.5,
"max_tokens": 3000,
},
"code_assistant": {
"name": "Code Assistant",
"system_prompt": "You are an expert software engineer. Help with code, debugging, architecture, and best practices. Write clean, efficient, well-documented code.",
"temperature": 0.3,
"max_tokens": 4000,
},
"creative_writer": {
"name": "Creative Writer",
"system_prompt": "You are a creative and imaginative writer. Help with storytelling, creative content, and engaging narratives.",
"temperature": 0.9,
"max_tokens": 2000,
},
"analyst": {
"name": "Data Analyst",
"system_prompt": "You are a data analyst. Help analyze data, create insights, and explain statistical concepts clearly.",
"temperature": 0.4,
"max_tokens": 2500,
},
"teacher": {
"name": "Teacher",
"system_prompt": "You are a patient and knowledgeable teacher. Explain concepts clearly, provide examples, and help with learning.",
"temperature": 0.6,
"max_tokens": 2000,
},
}
async def list_models(self, provider: str | None = None) -> dict[str, Any]:
"""List all available models"""
try:
result = await mcp_client.call_tool(
"local_llm", "llm_models", {"operation": "list_models", "provider": provider}
)
if result.get("success"):
return {
"success": True,
"models": result.get("data", {}).get("models", []),
"providers": result.get("data", {}).get("providers", []),
}
return {"success": False, "error": result.get("error", "Unknown error")}
except Exception as e:
logger.error(f"Error listing models: {e}")
return {"success": False, "error": str(e)}
async def list_ollama_models(self) -> dict[str, Any]:
"""List Ollama models"""
try:
result = await mcp_client.call_tool(
"local_llm", "llm_models", {"operation": "ollama_list"}
)
if result.get("success"):
return {"success": True, "models": result.get("data", {}).get("models", [])}
return {"success": False, "error": result.get("error", "Unknown error")}
except Exception as e:
logger.error(f"Error listing Ollama models: {e}")
return {"success": False, "error": str(e)}
async def list_lmstudio_models(self) -> dict[str, Any]:
"""List LM Studio models"""
try:
result = await mcp_client.call_tool(
"local_llm", "llm_models", {"operation": "lmstudio_list"}
)
if result.get("success"):
return {"success": True, "models": result.get("data", {}).get("models", [])}
return {"success": False, "error": result.get("error", "Unknown error")}
except Exception as e:
logger.error(f"Error listing LM Studio models: {e}")
return {"success": False, "error": str(e)}
async def load_model(self, model_id: str, provider: str = "ollama") -> dict[str, Any]:
"""Load a model for inference"""
try:
operation = f"{provider}_load" if provider in ["ollama", "lmstudio"] else "load_model"
result = await mcp_client.call_tool(
"local_llm", "llm_models", {"operation": operation, "model_id": model_id}
)
if result.get("success"):
self.active_models[model_id] = {
"model_id": model_id,
"provider": provider,
"loaded_at": datetime.now().isoformat(),
"status": "loaded",
}
return {
"success": True,
"model_id": model_id,
"message": f"Model {model_id} loaded successfully",
}
return {"success": False, "error": result.get("error", "Unknown error")}
except Exception as e:
logger.error(f"Error loading model {model_id}: {e}")
return {"success": False, "error": str(e)}
async def unload_model(self, model_id: str, provider: str = "ollama") -> dict[str, Any]:
"""Unload a model"""
try:
operation = (
f"{provider}_unload" if provider in ["ollama", "lmstudio"] else "unload_model"
)
result = await mcp_client.call_tool(
"local_llm", "llm_models", {"operation": operation, "model_id": model_id}
)
if result.get("success"):
if model_id in self.active_models:
del self.active_models[model_id]
return {
"success": True,
"model_id": model_id,
"message": f"Model {model_id} unloaded successfully",
}
return {"success": False, "error": result.get("error", "Unknown error")}
except Exception as e:
logger.error(f"Error unloading model {model_id}: {e}")
return {"success": False, "error": str(e)}
async def pull_model(self, model_id: str, provider: str = "ollama") -> dict[str, Any]:
"""Pull/download a model"""
try:
operation = f"{provider}_pull" if provider == "ollama" else "pull_model"
result = await mcp_client.call_tool(
"local_llm", "llm_models", {"operation": operation, "model_id": model_id}
)
if result.get("success"):
return {
"success": True,
"model_id": model_id,
"message": f"Model {model_id} pulled successfully",
}
return {"success": False, "error": result.get("error", "Unknown error")}
except Exception as e:
logger.error(f"Error pulling model {model_id}: {e}")
return {"success": False, "error": str(e)}
async def generate_text(
self,
model: str,
prompt: str,
temperature: float = 0.7,
max_tokens: int | None = None,
**kwargs,
) -> dict[str, Any]:
"""Generate text from a prompt"""
try:
result = await mcp_client.call_tool(
"local_llm",
"llm_generation",
{
"operation": "generate_text",
"model": model,
"prompt": prompt,
"temperature": temperature,
"max_tokens": max_tokens,
**kwargs,
},
)
return result
except Exception as e:
logger.error(f"Error generating text: {e}")
return {"success": False, "error": str(e)}
async def chat_completion(
self,
model: str,
messages: list[dict[str, str]],
temperature: float = 0.7,
max_tokens: int | None = None,
personality: str | None = None,
**kwargs,
) -> dict[str, Any]:
"""Generate chat completion"""
try:
# Add system prompt if personality is specified
if personality and personality in self.personalities:
persona = self.personalities[personality]
system_message = {"role": "system", "content": persona["system_prompt"]}
# Insert system message if not already present
if not messages or messages[0].get("role") != "system":
messages = [system_message] + messages
# Override temperature if personality has one
if "temperature" in persona:
temperature = persona["temperature"]
# Override max_tokens if personality has one
if max_tokens is None and "max_tokens" in persona:
max_tokens = persona["max_tokens"]
result = await mcp_client.call_tool(
"local_llm",
"llm_generation",
{
"operation": "chat_completion",
"model": model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
**kwargs,
},
)
return result
except Exception as e:
logger.error(f"Error in chat completion: {e}")
return {"success": False, "error": str(e)}
async def get_health(self) -> dict[str, Any]:
"""Get LLM service health"""
try:
result = await mcp_client.call_tool(
"local_llm", "llm_health", {"operation": "health_check"}
)
return result
except Exception as e:
logger.error(f"Error getting health: {e}")
return {"success": False, "error": str(e)}
async def get_system_info(self) -> dict[str, Any]:
"""Get system information"""
try:
result = await mcp_client.call_tool(
"local_llm", "llm_health", {"operation": "system_info"}
)
return result
except Exception as e:
logger.error(f"Error getting system info: {e}")
return {"success": False, "error": str(e)}
def get_personalities(self) -> dict[str, dict[str, str]]:
"""Get available personalities"""
return self.personalities
def add_personality(
self,
key: str,
name: str,
system_prompt: str,
temperature: float = 0.7,
max_tokens: int = 2000,
):
"""Add a custom personality"""
self.personalities[key] = {
"name": name,
"system_prompt": system_prompt,
"temperature": temperature,
"max_tokens": max_tokens,
}
def get_active_models(self) -> dict[str, dict[str, Any]]:
"""Get currently loaded models"""
return self.active_models
# Global LLM service instance
llm_service = LLMService()