"""LLM integration tools for testing prompts.
This module provides tools for executing prompts with an LLM to test
the full workflow of prompt retrieval, rendering, and execution.
"""
import json
import logging
import os
import re
import time
from typing import Annotated, Any
import httpx
from fastmcp import Context
from ..connection import ConnectionError, ConnectionManager
from ..mcp_instance import mcp
logger = logging.getLogger(__name__)
@mcp.tool
async def execute_prompt_with_llm(
prompt_name: Annotated[str, "Name of the prompt to execute"],
ctx: Context,
prompt_arguments: Annotated[dict[str, Any] | str | None, "Arguments to pass to the MCP prompt (JSON object or string)"] = None,
fill_variables: Annotated[dict[str, Any] | str | None, "Template variables to fill in prompt messages (JSON object or string)"] = None,
llm_config: Annotated[dict[str, Any] | str | None, "LLM configuration (url, model, api_key, etc.)"] = None
) -> dict[str, Any]:
"""Execute a prompt with an LLM and return the response.
This tool performs the complete workflow:
1. Retrieves the prompt from the connected MCP server with prompt_arguments
2. Optionally fills template variables in the prompt messages
3. Sends the prompt messages to an LLM
4. Returns the LLM's response along with metadata
Supports two prompt patterns:
- Standard MCP prompts: Pass arguments via prompt_arguments, server handles substitution
- Template variables: Use fill_variables to replace {variable} placeholders in messages
Args:
prompt_name: Name of the prompt to execute
prompt_arguments: Dictionary of arguments to pass to the MCP prompt (default: {})
fill_variables: Dictionary of template variables to fill in prompt messages (default: None)
Used for manual string replacement of {variable_name} patterns.
Values are JSON-serialized before substitution if they're not strings.
llm_config: Optional LLM configuration with keys:
- url: LLM endpoint URL (default: from LLM_URL env var)
- model: Model name (default: from LLM_MODEL_NAME env var)
- api_key: API key (default: from LLM_API_KEY env var)
- max_tokens: Maximum tokens in response (default: 1000)
- temperature: Sampling temperature (default: 0.7)
Returns:
Dictionary with execution results including:
- success: True if execution succeeded
- prompt: Original prompt information
- llm_request: The request sent to the LLM
- llm_response: The LLM's response
- parsed_response: Attempted JSON parsing if response looks like JSON
- metadata: Timing and configuration information
Raises:
Returns error dict for various failure scenarios:
- not_connected: No active MCP connection
- prompt_not_found: Prompt doesn't exist
- llm_config_error: Missing or invalid LLM configuration
- llm_request_error: LLM request failed
"""
start_time = time.perf_counter()
try:
# Parse JSON string parameters if needed
if isinstance(prompt_arguments, str):
try:
prompt_arguments = json.loads(prompt_arguments)
except json.JSONDecodeError as e:
return {
"success": False,
"error": {
"error_type": "invalid_arguments",
"message": f"prompt_arguments is not valid JSON: {str(e)}",
"details": {"raw_value": prompt_arguments[:200]},
"suggestion": "Provide a valid JSON object or dictionary",
},
"metadata": {"request_time_ms": 0},
}
if isinstance(fill_variables, str):
try:
fill_variables = json.loads(fill_variables)
except json.JSONDecodeError as e:
return {
"success": False,
"error": {
"error_type": "invalid_arguments",
"message": f"fill_variables is not valid JSON: {str(e)}",
"details": {"raw_value": fill_variables[:200]},
"suggestion": "Provide a valid JSON object or dictionary",
},
"metadata": {"request_time_ms": 0},
}
if isinstance(llm_config, str):
try:
llm_config = json.loads(llm_config)
except json.JSONDecodeError as e:
return {
"success": False,
"error": {
"error_type": "invalid_arguments",
"message": f"llm_config is not valid JSON: {str(e)}",
"details": {"raw_value": llm_config[:200]},
"suggestion": "Provide a valid JSON object or dictionary",
},
"metadata": {"request_time_ms": 0},
}
# Set default for prompt_arguments
if prompt_arguments is None:
prompt_arguments = {}
# Verify connection exists
client, state = ConnectionManager.require_connection()
# User-facing progress update
await ctx.info(f"Executing prompt '{prompt_name}' with LLM")
# Detailed technical log
logger.info(
f"Executing prompt '{prompt_name}' with LLM",
extra={
"prompt_name": prompt_name,
"arguments": prompt_arguments,
"has_fill_variables": fill_variables is not None,
},
)
# Get the prompt from the MCP server
prompt_start = time.perf_counter()
result = await client.get_prompt(prompt_name, prompt_arguments)
prompt_elapsed_ms = (time.perf_counter() - prompt_start) * 1000
# Extract messages
messages: list[dict[str, Any]] = []
if hasattr(result, "messages") and result.messages:
for message in result.messages:
msg_dict: dict[str, Any] = {"role": message.role}
# Extract content
if hasattr(message, "content"):
content = message.content
if hasattr(content, "text"):
msg_dict["content"] = content.text
elif (
hasattr(content, "type")
and content.type == "text"
and hasattr(content, "text")
):
msg_dict["content"] = content.text
else:
msg_dict["content"] = str(content)
messages.append(msg_dict)
# Fill template variables if provided
if fill_variables:
logger.debug(f"Filling template variables: {list(fill_variables.keys())}")
for msg in messages:
if "content" in msg and isinstance(msg["content"], str):
content_str = msg["content"]
# Fill each variable
for var_name, var_value in fill_variables.items():
placeholder = "{" + var_name + "}"
# Convert value to string (JSON serialize if not a string)
if isinstance(var_value, str):
replacement = var_value
else:
replacement = json.dumps(var_value, indent=2)
content_str = content_str.replace(placeholder, replacement)
msg["content"] = content_str
# Get LLM configuration
if llm_config is None:
llm_config = {}
llm_url = llm_config.get("url") or os.getenv("LLM_URL")
llm_model = llm_config.get("model") or os.getenv("LLM_MODEL_NAME")
llm_api_key = llm_config.get("api_key") or os.getenv("LLM_API_KEY")
max_tokens = llm_config.get("max_tokens", 1000)
temperature = llm_config.get("temperature", 0.7)
if not all([llm_url, llm_model, llm_api_key]):
return {
"success": False,
"error": {
"error_type": "llm_config_error",
"message": "Missing LLM configuration. Provide llm_config or set LLM_URL, LLM_MODEL_NAME, and LLM_API_KEY environment variables",
"details": {
"has_url": bool(llm_url),
"has_model": bool(llm_model),
"has_api_key": bool(llm_api_key),
},
"suggestion": "Set LLM_URL, LLM_MODEL_NAME, and LLM_API_KEY in your .env file",
},
"metadata": {
"request_time_ms": round((time.perf_counter() - start_time) * 1000, 2),
},
}
# Prepare LLM request
llm_request = {
"model": llm_model,
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
}
# User-facing progress update
await ctx.info(f"Sending request to LLM endpoint: {llm_url}")
# Send to LLM
llm_start = time.perf_counter()
async with httpx.AsyncClient(timeout=60.0) as http_client:
response = await http_client.post(
f"{llm_url}/chat/completions",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {llm_api_key}",
},
json=llm_request,
)
llm_elapsed_ms = (time.perf_counter() - llm_start) * 1000
total_elapsed_ms = (time.perf_counter() - start_time) * 1000
if response.status_code != 200:
logger.error(
f"LLM request failed with status {response.status_code}",
extra={
"status_code": response.status_code,
"response_text": response.text[:500],
},
)
return {
"success": False,
"error": {
"error_type": "llm_request_error",
"message": f"LLM request failed with status {response.status_code}",
"details": {
"status_code": response.status_code,
"response_text": response.text[:500],
},
"suggestion": "Check LLM endpoint configuration and API key",
},
"metadata": {
"request_time_ms": round(total_elapsed_ms, 2),
},
}
# Parse LLM response
llm_result = response.json()
llm_response_text = llm_result["choices"][0]["message"]["content"]
# Try to extract and parse JSON if present
parsed_response = None
json_match = re.search(r"```json\s*(.*?)\s*```", llm_response_text, re.DOTALL)
if json_match:
try:
parsed_response = json.loads(json_match.group(1))
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse extracted JSON: {e}")
elif llm_response_text.strip().startswith("{"):
try:
parsed_response = json.loads(llm_response_text)
except json.JSONDecodeError:
pass # Not valid JSON, leave as None
# User-facing success update
await ctx.info(f"Prompt '{prompt_name}' executed successfully with LLM")
# Detailed technical log
logger.info(
f"Prompt '{prompt_name}' executed successfully with LLM",
extra={
"prompt_name": prompt_name,
"prompt_ms": prompt_elapsed_ms,
"llm_ms": llm_elapsed_ms,
"total_ms": total_elapsed_ms,
},
)
return {
"success": True,
"prompt": {
"name": prompt_name,
"arguments": prompt_arguments,
"message_count": len(messages),
},
"llm_request": llm_request,
"llm_response": {
"text": llm_response_text,
"usage": llm_result.get("usage", {}),
"model": llm_result.get("model"),
},
"parsed_response": parsed_response,
"metadata": {
"prompt_retrieval_ms": round(prompt_elapsed_ms, 2),
"llm_execution_ms": round(llm_elapsed_ms, 2),
"total_time_ms": round(total_elapsed_ms, 2),
"server_url": state.server_url,
"llm_endpoint": llm_url,
"llm_model": llm_model,
},
}
except ConnectionError as e:
elapsed_ms = (time.perf_counter() - start_time) * 1000
# User-facing error update
await ctx.error(f"Not connected when executing prompt '{prompt_name}': {str(e)}")
# Detailed technical log
logger.error(
f"Not connected when executing prompt '{prompt_name}': {str(e)}",
extra={"prompt_name": prompt_name, "duration_ms": elapsed_ms},
)
return {
"success": False,
"error": {
"error_type": "not_connected",
"message": str(e),
"details": {"prompt_name": prompt_name},
"suggestion": "Use connect_to_server() to establish a connection first",
},
"metadata": {
"request_time_ms": round(elapsed_ms, 2),
},
}
except Exception as e:
elapsed_ms = (time.perf_counter() - start_time) * 1000
# Determine error type
error_type = "execution_error"
suggestion = "Check the prompt name, arguments, and LLM configuration"
error_msg = str(e).lower()
if "not found" in error_msg or "unknown prompt" in error_msg:
error_type = "prompt_not_found"
suggestion = f"Prompt '{prompt_name}' does not exist on the server"
elif "timeout" in error_msg or "connection" in error_msg:
error_type = "llm_request_error"
suggestion = "LLM request timed out or connection failed"
# User-facing error update
await ctx.error(f"Failed to execute prompt '{prompt_name}' with LLM: {str(e)}")
# Detailed technical log
logger.error(
f"Failed to execute prompt '{prompt_name}' with LLM: {str(e)}",
extra={
"prompt_name": prompt_name,
"error_type": error_type,
"duration_ms": elapsed_ms,
},
)
ConnectionManager.increment_stat("errors")
return {
"success": False,
"error": {
"error_type": error_type,
"message": f"Failed to execute prompt with LLM: {str(e)}",
"details": {
"prompt_name": prompt_name,
"exception_type": type(e).__name__,
},
"suggestion": suggestion,
},
"metadata": {
"request_time_ms": round(elapsed_ms, 2),
},
}