"""
MCP Elicitations Module
This module implements elicitation schemas and extraction logic for
structured data extraction from natural language conversations.
Elicitations allow users to provide information naturally while the system
extracts and validates structured data automatically.
Key Features:
- Schema-based elicitation definitions
- Automatic data extraction from natural language
- Missing field detection and prompting
- Validation and error handling
- Integration with MCP tools
"""
from typing import Dict, Any, List, Optional, Union
from dataclasses import dataclass, field
from enum import Enum
import json
from src.observability import get_logger
logger = get_logger(__name__)
class FieldType(str, Enum):
"""Supported field types for elicitation."""
STRING = "string"
INTEGER = "integer"
FLOAT = "float"
BOOLEAN = "boolean"
EMAIL = "email"
PHONE = "phone"
URL = "url"
ENUM = "enum"
DATE = "date"
TEXTAREA = "textarea"
@dataclass
class ElicitationField:
"""A field definition for elicitation."""
name: str
type: FieldType
required: bool = True
description: str = ""
enum_options: Optional[List[str]] = None
validation_pattern: Optional[str] = None
example: Optional[str] = None
prompt_if_missing: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for LLM."""
result = {
"name": self.name,
"type": self.type.value,
"required": self.required,
"description": self.description
}
if self.enum_options:
result["enum_options"] = self.enum_options
if self.validation_pattern:
result["validation_pattern"] = self.validation_pattern
if self.example:
result["example"] = self.example
if self.prompt_if_missing:
result["prompt_if_missing"] = self.prompt_if_missing
return result
@dataclass
class ElicitationSchema:
"""A complete elicitation schema for a data type."""
name: str
description: str
fields: List[ElicitationField]
example_prompt: str
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for LLM."""
return {
"schema_name": self.name,
"description": self.description,
"fields": [field.to_dict() for field in self.fields],
"example_prompt": self.example_prompt
}
def get_required_fields(self) -> List[ElicitationField]:
"""Get only required fields."""
return [field for field in self.fields if field.required]
def get_missing_fields(self, data: Dict[str, Any]) -> List[ElicitationField]:
"""Get fields that are missing from the data."""
missing = []
for field in self.fields:
if field.required and field.name not in data:
missing.append(field)
return missing
@dataclass
class ElicitationResult:
"""Result of an elicitation extraction."""
success: bool
extracted_data: Dict[str, Any] = field(default_factory=dict)
missing_fields: List[str] = field(default_factory=list)
confidence_score: float = 0.0
follow_up_questions: List[str] = field(default_factory=list)
raw_response: Optional[str] = None
error_message: Optional[str] = None
# Predefined Elicitation Schemas
VENDOR_CREATION_SCHEMA = ElicitationSchema(
name="vendor_creation",
description="Extract vendor information for creating a new vendor",
fields=[
ElicitationField(
name="vendor_name",
type=FieldType.STRING,
required=True,
description="The legal name of the vendor company",
example="AutoParts Pro Inc.",
prompt_if_missing="What is the vendor's company name?"
),
ElicitationField(
name="vendor_email",
type=FieldType.EMAIL,
required=False,
description="Primary contact email for the vendor",
example="contact@autopartspro.com",
validation_pattern=r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
),
ElicitationField(
name="vendor_phone",
type=FieldType.PHONE,
required=False,
description="Primary contact phone number",
example="(555) 123-4567",
validation_pattern=r"^[\d\s\-\(\)\+]+$"
),
ElicitationField(
name="vendor_website",
type=FieldType.URL,
required=False,
description="Company website URL",
example="https://www.autopartspro.com"
),
ElicitationField(
name="vendor_domain",
type=FieldType.STRING,
required=False,
description="Business domain or industry",
example="Automotive Parts",
prompt_if_missing="What industry or domain does this vendor operate in?"
),
ElicitationField(
name="description",
type=FieldType.TEXTAREA,
required=False,
description="Brief description of the vendor's business",
example="Specializes in aftermarket automotive parts and accessories"
),
ElicitationField(
name="department_id",
type=FieldType.INTEGER,
required=False,
description="Department ID for categorization",
example=1
),
ElicitationField(
name="billing_type_id",
type=FieldType.INTEGER,
required=False,
description="Billing type ID",
example=1
),
ElicitationField(
name="contract_required",
type=FieldType.BOOLEAN,
required=False,
description="Whether a contract is required for this vendor",
example=False
),
ElicitationField(
name="manager_name",
type=FieldType.STRING,
required=False,
description="Name of the vendor manager",
example="John Smith"
),
ElicitationField(
name="location",
type=FieldType.STRING,
required=False,
description="Physical location of the vendor",
example="Detroit, MI"
),
ElicitationField(
name="gl_code",
type=FieldType.STRING,
required=False,
description="General ledger code",
example="6001"
)
],
example_prompt="I want to add a new vendor called 'AutoParts Pro' that sells car parts. Their email is contact@autopartspro.com and they're located in Detroit."
)
CONTRACT_CREATION_SCHEMA = ElicitationSchema(
name="contract_creation",
description="Extract contract information for creating a new contract",
fields=[
ElicitationField(
name="contract_name",
type=FieldType.STRING,
required=True,
description="Name of the contract",
example="AutoParts Supply Agreement"
),
ElicitationField(
name="vendor_id",
type=FieldType.INTEGER,
required=True,
description="ID of the vendor for this contract",
example=123
),
ElicitationField(
name="annual_amount",
type=FieldType.FLOAT,
required=False,
description="Annual contract amount",
example=50000.00
),
ElicitationField(
name="start_date",
type=FieldType.DATE,
required=False,
description="Contract start date",
example="2024-01-01"
),
ElicitationField(
name="end_date",
type=FieldType.DATE,
required=False,
description="Contract end date",
example="2024-12-31"
),
ElicitationField(
name="auto_renew",
type=FieldType.BOOLEAN,
required=False,
description="Whether contract auto-renews",
example=True
)
],
example_prompt="Create a contract with AutoParts Pro for $50,000 annually, starting January 1st and ending December 31st, with auto-renewal enabled."
)
# Schema Registry
ELICITATION_SCHEMAS = {
"vendor_creation": VENDOR_CREATION_SCHEMA,
"contract_creation": CONTRACT_CREATION_SCHEMA,
}
def get_schema(schema_name: str) -> Optional[ElicitationSchema]:
"""Get an elicitation schema by name."""
return ELICITATION_SCHEMAS.get(schema_name)
def create_elicitation_prompt(
schema: ElicitationSchema,
user_input: str,
existing_data: Optional[Dict[str, Any]] = None
) -> str:
"""
Create a prompt for elicitation extraction.
Args:
schema: The elicitation schema to use
user_input: The user's natural language input
existing_data: Any data already extracted
Returns:
Prompt string for LLM
"""
prompt = f"""You are an expert data extraction assistant. Extract structured information from the user's input according to the following schema:
SCHEMA:
{json.dumps(schema.to_dict(), indent=2)}
USER INPUT:
"{user_input}"
"""
if existing_data:
prompt += f"""
ALREADY EXTRACTED DATA:
{json.dumps(existing_data, indent=2)}
Please extract any ADDITIONAL information from the user input and merge with existing data.
"""
prompt += """
INSTRUCTIONS:
1. Extract all information that matches the schema fields
2. For required fields, do your best to extract or infer the value
3. If a field is not mentioned in the user input, don't include it in the result
4. Return a JSON object with the extracted data
5. Include only fields that were actually found in the input
6. For confidence, include a confidence_score (0.0-1.0) based on how well the input matches what's needed
RESPONSE FORMAT:
```json
{
"extracted_data": {
"field_name": "extracted_value",
...
},
"confidence_score": 0.8,
"missing_fields": ["field1", "field2"],
"follow_up_questions": ["What is the vendor's email?"]
}
```
"""
return prompt
def validate_extracted_data(
schema: ElicitationSchema,
data: Dict[str, Any]
) -> List[str]:
"""
Validate extracted data against schema.
Args:
schema: The elicitation schema
data: Extracted data to validate
Returns:
List of validation errors
"""
errors = []
for field in schema.fields:
if field.required and field.name not in data:
errors.append(f"Required field '{field.name}' is missing")
continue
if field.name in data:
value = data[field.name]
# Type validation
if field.type == FieldType.EMAIL and value:
if not field.validation_pattern or not field.validation_pattern.match(value):
errors.append(f"Invalid email format for '{field.name}'")
elif field.type == FieldType.PHONE and value:
if not field.validation_pattern or not field.validation_pattern.match(value):
errors.append(f"Invalid phone format for '{field.name}'")
elif field.type == FieldType.URL and value:
if not value.startswith(('http://', 'https://')):
errors.append(f"Invalid URL format for '{field.name}'")
elif field.type == FieldType.ENUM and value:
if field.enum_options and value not in field.enum_options:
errors.append(f"Invalid value '{value}' for '{field.name}'. Must be one of: {field.enum_options}")
return errors
async def extract_with_elicitation(
schema_name: str,
user_input: str,
provider, # BaseLLMProvider
existing_data: Optional[Dict[str, Any]] = None
) -> ElicitationResult:
"""
Extract structured data using elicitation.
Args:
schema_name: Name of the elicitation schema
user_input: User's natural language input
provider: LLM provider for extraction
existing_data: Any data already extracted
Returns:
ElicitationResult with extracted data
"""
try:
schema = get_schema(schema_name)
if not schema:
return ElicitationResult(
success=False,
error_message=f"Unknown schema: {schema_name}"
)
# Create extraction prompt
prompt = create_elicitation_prompt(schema, user_input, existing_data)
logger.info(
"elicitation_extraction_started",
schema=schema_name,
input_length=len(user_input)
)
# Call LLM for extraction
response = await provider.generate(
messages=[{"role": "user", "content": prompt}],
tools=None,
temperature=0.0 # Deterministic for extraction
)
# Parse response
try:
# Extract JSON from response
content = response.content
if "```json" in content:
json_start = content.find("```json") + 7
json_end = content.find("```", json_start)
json_str = content[json_start:json_end].strip()
else:
json_str = content.strip()
extracted = json.loads(json_str)
# Validate extracted data
validation_errors = validate_extracted_data(schema, extracted.get("extracted_data", {}))
result = ElicitationResult(
success=len(validation_errors) == 0,
extracted_data=extracted.get("extracted_data", {}),
missing_fields=extracted.get("missing_fields", []),
confidence_score=extracted.get("confidence_score", 0.0),
follow_up_questions=extracted.get("follow_up_questions", []),
raw_response=response.content,
error_message="; ".join(validation_errors) if validation_errors else None
)
logger.info(
"elicitation_extraction_completed",
schema=schema_name,
success=result.success,
fields_extracted=len(result.extracted_data),
confidence=result.confidence_score
)
return result
except json.JSONDecodeError as e:
logger.error(
"elicitation_json_parse_error",
schema=schema_name,
error=str(e),
response_content=response.content[:200]
)
return ElicitationResult(
success=False,
error_message=f"Failed to parse LLM response: {e}",
raw_response=response.content
)
except Exception as e:
logger.error(
"elicitation_extraction_failed",
schema=schema_name,
error=str(e),
error_type=type(e).__name__
)
return ElicitationResult(
success=False,
error_message=f"Elicitation failed: {e}"
)
def generate_follow_up_prompt(
schema: ElicitationSchema,
result: ElicitationResult
) -> str:
"""
Generate a follow-up prompt for missing information.
Args:
schema: The elicitation schema
result: Previous extraction result
Returns:
Follow-up prompt string
"""
if not result.missing_fields and not result.follow_up_questions:
return ""
prompt = "I need some additional information to create the vendor:\n\n"
# Add specific follow-up questions
if result.follow_up_questions:
for question in result.follow_up_questions:
prompt += f"• {question}\n"
# Add prompts for missing required fields
missing_required = schema.get_missing_fields(result.extracted_data)
if missing_required:
prompt += "\nPlease provide:\n"
for field in missing_required:
if field.prompt_if_missing:
prompt += f"• {field.prompt_if_missing}\n"
else:
prompt += f"• {field.description}\n"
return prompt