schema_generator.py•5.66 kB
"""llm-assisted schema generation from python signatures and docstrings."""
from __future__ import annotations
import inspect
import json
from typing import Any, Dict, Optional, get_origin, get_args
from pydantic import BaseModel, Field, create_model
import openai
from .reflection import ToolMetadata
from .config import get_settings
class SchemaGenerator:
"""generates json schemas for sdk tools with llm assistance."""
PYTHON_TO_JSON_TYPE = {
str: "string",
int: "integer",
float: "number",
bool: "boolean",
list: "array",
dict: "object",
type(None): "null",
}
def __init__(self, use_llm: bool = True):
"""
initialize schema generator.
args:
use_llm: whether to use llm for enhancing schemas
"""
self.use_llm = use_llm
if use_llm:
settings = get_settings()
openai.api_key = settings.openai_api_key
self.model = settings.openai_model
def generate_schema(self, tool: ToolMetadata) -> Dict[str, Any]:
"""generate mcp-compatible json schema for a tool."""
schema: Dict[str, Any] = {
"type": "object",
"properties": {},
"required": []
}
if not tool.signature:
return schema
# extract from signature
for param_name, param in tool.signature.parameters.items():
if param_name in ["self", "cls"]:
continue
prop_schema = self._param_to_schema(param)
schema["properties"][param_name] = prop_schema
if param.default == inspect.Parameter.empty:
schema["required"].append(param_name)
# enhance with llm if enabled
if self.use_llm:
schema = self._enhance_with_llm(tool, schema)
return schema
def _param_to_schema(self, param: inspect.Parameter) -> Dict[str, Any]:
"""convert a parameter to json schema."""
prop: Dict[str, Any] = {}
# extract type from annotation
if param.annotation != inspect.Parameter.empty:
param_type = param.annotation
# handle optional types
origin = get_origin(param_type)
if origin is not None:
args = get_args(param_type)
if origin == Optional or (hasattr(origin, "__name__") and origin.__name__ == "UnionType"):
# extract non-none type
param_type = next((a for a in args if a != type(None)), str)
json_type = self.PYTHON_TO_JSON_TYPE.get(param_type, "string")
prop["type"] = json_type
else:
prop["type"] = "string" # default
# add default if present
if param.default != inspect.Parameter.empty:
try:
# try to serialize default value
json.dumps(param.default)
prop["default"] = param.default
except (TypeError, ValueError):
# can't serialize, describe it instead
prop["description"] = f"default: {param.default}"
return prop
def _enhance_with_llm(self, tool: ToolMetadata, schema: Dict[str, Any]) -> Dict[str, Any]:
"""use llm to fill gaps in schema (descriptions, enums, examples)."""
try:
prompt = self._build_enhancement_prompt(tool, schema)
response = openai.chat.completions.create(
model=self.model,
temperature=0,
messages=[
{"role": "system", "content": "you are an expert at generating json schemas for api tools. respond only with valid json."},
{"role": "user", "content": prompt}
]
)
enhanced = json.loads(response.choices[0].message.content)
# merge enhanced schema
if "properties" in enhanced:
for prop_name, prop_schema in enhanced["properties"].items():
if prop_name in schema["properties"]:
schema["properties"][prop_name].update(prop_schema)
if "description" in enhanced:
schema["description"] = enhanced["description"]
return schema
except Exception as e:
# fallback to base schema if llm fails
print(f"warning: llm enhancement failed for {tool.fq_name}: {e}")
return schema
def _build_enhancement_prompt(self, tool: ToolMetadata, schema: Dict[str, Any]) -> str:
"""build prompt for llm to enhance schema."""
return f"""
enhance this json schema for the tool '{tool.fq_name}'.
current schema:
{json.dumps(schema, indent=2)}
docstring:
{tool.docstring or "none"}
add:
1. description for each parameter (what it does, constraints, format hints)
2. description for the tool itself
3. enum values where obvious (e.g., "status" might be ["active", "inactive"])
4. examples in descriptions where helpful
respond with enhanced json schema only, keeping the same structure.
"""
def generate_mcp_tool(self, tool: ToolMetadata) -> Dict[str, Any]:
"""generate full mcp tool definition."""
schema = self.generate_schema(tool)
return {
"name": tool.fq_name,
"description": tool.docstring or f"call {tool.fq_name}",
"inputSchema": schema
}