Claude-LMStudio Bridge
by infinitimeless
Verified
import sys
import traceback
import os
import json
import logging
from typing import Any, Dict, List, Optional, Union
from mcp.server.fastmcp import FastMCP
import httpx
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[
logging.StreamHandler(sys.stderr)
]
)
# Print startup message
logging.info("Starting LMStudio bridge server...")
try:
# ===== Configuration =====
# Load from environment variables with defaults
LMSTUDIO_HOST = os.getenv("LMSTUDIO_HOST", "127.0.0.1")
LMSTUDIO_PORT = os.getenv("LMSTUDIO_PORT", "1234")
LMSTUDIO_API_URL = f"http://{LMSTUDIO_HOST}:{LMSTUDIO_PORT}/v1"
DEBUG = os.getenv("DEBUG", "false").lower() in ("true", "1", "yes")
# Set more verbose logging if debug mode is enabled
if DEBUG:
logging.getLogger().setLevel(logging.DEBUG)
logging.debug(f"Debug mode enabled")
logging.info(f"Configured LM Studio API URL: {LMSTUDIO_API_URL}")
# Initialize FastMCP server
mcp = FastMCP("lmstudio-bridge")
# ===== Helper Functions =====
async def call_lmstudio_api(endpoint: str, payload: Dict[str, Any], timeout: float = 60.0) -> Dict[str, Any]:
"""Unified API communication function with better error handling"""
headers = {
"Content-Type": "application/json",
"User-Agent": "claude-lmstudio-bridge/1.0"
}
url = f"{LMSTUDIO_API_URL}/{endpoint}"
logging.debug(f"Making request to {url}")
logging.debug(f"Payload: {json.dumps(payload, indent=2)}")
try:
async with httpx.AsyncClient() as client:
response = await client.post(
url,
json=payload,
headers=headers,
timeout=timeout
)
# Better error handling with specific error messages
if response.status_code != 200:
error_message = f"LM Studio API error: {response.status_code}"
try:
error_json = response.json()
if "error" in error_json:
if isinstance(error_json["error"], dict) and "message" in error_json["error"]:
error_message += f" - {error_json['error']['message']}"
else:
error_message += f" - {error_json['error']}"
except:
error_message += f" - {response.text[:100]}"
logging.error(f"Error response: {error_message}")
return {"error": error_message}
result = response.json()
logging.debug(f"Response received: {json.dumps(result, indent=2, default=str)[:200]}...")
return result
except httpx.RequestError as e:
logging.error(f"Request error: {str(e)}")
return {"error": f"Connection error: {str(e)}"}
except Exception as e:
logging.error(f"Unexpected error: {str(e)}")
return {"error": f"Unexpected error: {str(e)}"}
def prepare_chat_messages(messages_input: Union[str, List, Dict]) -> List[Dict[str, str]]:
"""Convert various input formats to what LMStudio expects"""
try:
# If messages_input is a string
if isinstance(messages_input, str):
# Try to parse it as JSON
try:
parsed = json.loads(messages_input)
if isinstance(parsed, list):
return parsed
else:
# If it's parsed but not a list, make it a user message
return [{"role": "user", "content": messages_input}]
except json.JSONDecodeError:
# If not valid JSON, assume it's a simple message
return [{"role": "user", "content": messages_input}]
# If it's a list already
elif isinstance(messages_input, list):
return messages_input
# If it's a dict, assume it's a single message
elif isinstance(messages_input, dict) and "content" in messages_input:
if "role" not in messages_input:
messages_input["role"] = "user"
return [messages_input]
# If it's some other format, convert to string and make it a user message
else:
return [{"role": "user", "content": str(messages_input)}]
except Exception as e:
logging.error(f"Error preparing chat messages: {str(e)}")
# Fallback to simplest format
return [{"role": "user", "content": str(messages_input)}]
# ===== MCP Tools =====
@mcp.tool()
async def check_lmstudio_connection() -> str:
"""Check if the LM Studio server is running and accessible.
Returns:
Connection status and model information
"""
try:
# Try to get the server status via models endpoint
async with httpx.AsyncClient() as client:
response = await client.get(f"{LMSTUDIO_API_URL}/models", timeout=5.0)
if response.status_code == 200:
models_data = response.json()
if "data" in models_data and len(models_data["data"]) > 0:
active_model = models_data["data"][0]["id"]
return f"✅ Connected to LM Studio. Active model: {active_model}"
else:
return "✅ Connected to LM Studio but no models are currently loaded"
else:
return f"❌ LM Studio returned an error: {response.status_code}"
except Exception as e:
return f"❌ Failed to connect to LM Studio: {str(e)}"
@mcp.tool()
async def list_lmstudio_models() -> str:
"""List available LLM models in LM Studio.
Returns:
A formatted list of available models with their details.
"""
logging.info("list_lmstudio_models function called")
try:
# Use the API helper function
models_response = await call_lmstudio_api("models", {}, timeout=10.0)
# Check for errors from the API helper
if "error" in models_response:
return f"Error listing models: {models_response['error']}"
if not models_response or "data" not in models_response:
return "No models found or unexpected response format."
models = models_response["data"]
model_info = []
for model in models:
model_info.append(f"ID: {model.get('id', 'Unknown')}")
model_info.append(f"Name: {model.get('name', 'Unknown')}")
if model.get('description'):
model_info.append(f"Description: {model.get('description')}")
model_info.append("---")
if not model_info:
return "No models available in LM Studio."
return "\n".join(model_info)
except Exception as e:
logging.error(f"Unexpected error in list_lmstudio_models: {str(e)}")
traceback.print_exc(file=sys.stderr)
return f"Unexpected error: {str(e)}"
@mcp.tool()
async def generate_text(
prompt: str,
model_id: str = "",
max_tokens: int = 1000,
temperature: float = 0.7
) -> str:
"""Generate text using a local LLM in LM Studio.
Args:
prompt: The text prompt to send to the model
model_id: ID of the model to use (leave empty for default model)
max_tokens: Maximum number of tokens in the response (default: 1000)
temperature: Randomness of the output (0-1, default: 0.7)
Returns:
The generated text from the local LLM
"""
logging.info("generate_text function called")
try:
# Validate inputs
if not prompt or not prompt.strip():
return "Error: Prompt cannot be empty."
if max_tokens < 1:
return "Error: max_tokens must be a positive integer."
if temperature < 0 or temperature > 1:
return "Error: temperature must be between 0 and 1."
# Prepare payload
payload = {
"prompt": prompt,
"max_tokens": max_tokens,
"temperature": temperature,
"stream": False
}
# Add model if specified
if model_id and model_id.strip():
payload["model"] = model_id.strip()
# Make request to LM Studio API using the helper function
response = await call_lmstudio_api("completions", payload)
# Check for errors from the API helper
if "error" in response:
return f"Error generating text: {response['error']}"
# Extract and return the generated text
if "choices" in response and len(response["choices"]) > 0:
return response["choices"][0].get("text", "")
return "No response generated."
except Exception as e:
logging.error(f"Unexpected error in generate_text: {str(e)}")
traceback.print_exc(file=sys.stderr)
return f"Unexpected error: {str(e)}"
@mcp.tool()
async def chat_completion(
messages: str,
model_id: str = "",
max_tokens: int = 1000,
temperature: float = 0.7
) -> str:
"""Generate a chat completion using a local LLM in LM Studio.
Args:
messages: JSON string of messages in the format [{"role": "user", "content": "Hello"}, ...]
or a simple text string which will be treated as a user message
model_id: ID of the model to use (leave empty for default model)
max_tokens: Maximum number of tokens in the response (default: 1000)
temperature: Randomness of the output (0-1, default: 0.7)
Returns:
The generated text from the local LLM
"""
logging.info("chat_completion function called")
try:
# Standardize message format using the helper function
messages_formatted = prepare_chat_messages(messages)
logging.debug(f"Formatted messages: {json.dumps(messages_formatted, indent=2)}")
# Validate inputs
if not messages_formatted:
return "Error: At least one message is required."
if max_tokens < 1:
return "Error: max_tokens must be a positive integer."
if temperature < 0 or temperature > 1:
return "Error: temperature must be between 0 and 1."
# Prepare payload
payload = {
"messages": messages_formatted,
"max_tokens": max_tokens,
"temperature": temperature,
"stream": False
}
# Add model if specified
if model_id and model_id.strip():
payload["model"] = model_id.strip()
# Make request to LM Studio API using the helper function
response = await call_lmstudio_api("chat/completions", payload)
# Check for errors from the API helper
if "error" in response:
return f"Error generating chat completion: {response['error']}"
# Extract and return the generated text
if "choices" in response and len(response["choices"]) > 0:
choice = response["choices"][0]
if "message" in choice and "content" in choice["message"]:
return choice["message"]["content"]
return "No response generated."
except Exception as e:
logging.error(f"Unexpected error in chat_completion: {str(e)}")
traceback.print_exc(file=sys.stderr)
return f"Unexpected error: {str(e)}"
if __name__ == "__main__":
logging.info("Starting server with stdio transport...")
# Initialize and run the server
mcp.run(transport='stdio')
except Exception as e:
logging.critical(f"CRITICAL ERROR: {str(e)}")
logging.critical("Traceback:")
traceback.print_exc(file=sys.stderr)
sys.exit(1)