Crawl4AI+SearXNG MCP Server

utils.py•12.6 KiB

""" Validation utility functions for knowledge graph validator. Provides standalone utility functions for validating parameters, calculating confidence scores, detecting hallucinations, and checking knowledge graph membership. """ from dataclasses import dataclass, field from enum import Enum from typing import Any class ValidationStatus(Enum): """Status of a validation result""" VALID = "VALID" INVALID = "INVALID" UNCERTAIN = "UNCERTAIN" NOT_FOUND = "NOT_FOUND" @dataclass class ValidationResult: """Result of validating a single element""" status: "ValidationStatus" confidence: float # 0.0 to 1.0 message: str details: dict[str, Any] = field(default_factory=dict) suggestions: list[str] = field(default_factory=list) @dataclass class ScriptValidationResult: """Complete validation results for a script.""" script_path: str analysis_result: Any import_validations: list[Any] = field(default_factory=list) class_validations: list[Any] = field(default_factory=list) method_validations: list[Any] = field(default_factory=list) attribute_validations: list[Any] = field(default_factory=list) function_validations: list[Any] = field(default_factory=list) overall_confidence: float = 0.0 hallucinations_detected: list[dict[str, Any]] = field(default_factory=list) def validate_parameters( expected_params: list[str], provided_args: list[str], provided_kwargs: dict[str, str], ) -> ValidationResult: """ Validate function/method parameters with comprehensive support. Validates positional and keyword arguments against expected parameters, supporting varargs, varkwargs, and keyword-only parameters. Args: expected_params: List of expected parameter definitions. Format: "name:type" or "[keyword_only] name:type=default" provided_args: List of provided positional arguments. provided_kwargs: Dictionary of provided keyword arguments. Returns: ValidationResult indicating if parameters are valid, with confidence score and detailed message. """ if not expected_params: return ValidationResult( status=ValidationStatus.UNCERTAIN, confidence=0.5, message="Parameter information not available", ) # Parse expected parameters - handle detailed format required_positional = [] optional_positional = [] keyword_only_required = [] keyword_only_optional = [] has_varargs = False has_varkwargs = False for param in expected_params: # Handle detailed format: "[keyword_only] name:type=default" or "name:type" param_clean = param.strip() # Check for parameter kind prefix kind = "positional" if param_clean.startswith("["): end_bracket = param_clean.find("]") if end_bracket > 0: kind = param_clean[1:end_bracket] param_clean = param_clean[end_bracket+1:].strip() # Check for varargs/varkwargs if param_clean.startswith("*") and not param_clean.startswith("**"): has_varargs = True continue if param_clean.startswith("**"): has_varkwargs = True continue # Parse name and check if optional if ":" in param_clean: param_name = param_clean.split(":")[0] is_optional = "=" in param_clean if kind == "keyword_only": if is_optional: keyword_only_optional.append(param_name) else: keyword_only_required.append(param_name) elif is_optional: optional_positional.append(param_name) else: required_positional.append(param_name) # Count provided parameters provided_positional_count = len(provided_args) provided_keyword_names = set(provided_kwargs.keys()) # Validate positional arguments min_required_positional = len(required_positional) max_allowed_positional = len(required_positional) + len(optional_positional) if not has_varargs and provided_positional_count > max_allowed_positional: return ValidationResult( status=ValidationStatus.INVALID, confidence=0.8, message=f"Too many positional arguments: provided {provided_positional_count}, max allowed {max_allowed_positional}", ) if provided_positional_count < min_required_positional: return ValidationResult( status=ValidationStatus.INVALID, confidence=0.8, message=f"Too few positional arguments: provided {provided_positional_count}, required {min_required_positional}", ) # Validate keyword arguments all_valid_kwarg_names = set(required_positional + optional_positional + keyword_only_required + keyword_only_optional) invalid_kwargs = provided_keyword_names - all_valid_kwarg_names if invalid_kwargs and not has_varkwargs: return ValidationResult( status=ValidationStatus.INVALID, confidence=0.7, message=f"Invalid keyword arguments: {list(invalid_kwargs)}", suggestions=[f"Valid parameters: {list(all_valid_kwarg_names)}"], ) # Check required keyword-only arguments missing_required_kwargs = set(keyword_only_required) - provided_keyword_names if missing_required_kwargs: return ValidationResult( status=ValidationStatus.INVALID, confidence=0.8, message=f"Missing required keyword arguments: {list(missing_required_kwargs)}", ) return ValidationResult( status=ValidationStatus.VALID, confidence=0.9, message="Parameters are valid", ) def calculate_overall_confidence( result: ScriptValidationResult, knowledge_graph_modules: set[str], ) -> float: """ Calculate overall confidence score for the validation (knowledge graph items only). Computes the average confidence of validations for items originating from the knowledge graph. Returns perfect confidence (1.0) if no knowledge graph items were validated. Args: result: ScriptValidationResult containing all validation results. knowledge_graph_modules: Set of module names in the knowledge graph. Returns: Float between 0.0 and 1.0 representing overall confidence score. """ kg_validations: list[float] = [] # Only count validations from knowledge graph imports for val in result.import_validations: if val.validation.details.get("in_knowledge_graph", False): kg_validations.append(val.validation.confidence) # Only count validations from knowledge graph classes for val in result.class_validations: class_name = val.class_instantiation.full_class_name or val.class_instantiation.class_name if is_from_knowledge_graph(class_name, knowledge_graph_modules): kg_validations.append(val.validation.confidence) # Only count validations from knowledge graph methods for val in result.method_validations: if val.method_call.object_type and is_from_knowledge_graph(val.method_call.object_type, knowledge_graph_modules): kg_validations.append(val.validation.confidence) # Only count validations from knowledge graph attributes for val in result.attribute_validations: if val.attribute_access.object_type and is_from_knowledge_graph(val.attribute_access.object_type, knowledge_graph_modules): kg_validations.append(val.validation.confidence) # Only count validations from knowledge graph functions for val in result.function_validations: if val.function_call.full_name and is_from_knowledge_graph(val.function_call.full_name, knowledge_graph_modules): kg_validations.append(val.validation.confidence) if not kg_validations: return 1.0 # No knowledge graph items to validate = perfect confidence return sum(kg_validations) / len(kg_validations) def is_from_knowledge_graph( class_type: str, knowledge_graph_modules: set[str], ) -> bool: """ Check if a class type comes from a module in the knowledge graph. For dotted names like "pydantic_ai.Agent", extracts and checks the base module. For simple names, checks for exact matches. Args: class_type: The class name or dotted class path to check. knowledge_graph_modules: Set of module names in the knowledge graph. Returns: True if the class type is from a knowledge graph module, False otherwise. """ if not class_type: return False # For dotted names like "pydantic_ai.Agent" or "pydantic_ai.StreamedRunResult", check the base module if "." in class_type: base_module = class_type.split(".")[0] # Exact match only - "pydantic" should not match "pydantic_ai" return base_module in knowledge_graph_modules # For simple names, check if any knowledge graph module matches exactly # Don't use substring matching to avoid "pydantic" matching "pydantic_ai" return class_type in knowledge_graph_modules def detect_hallucinations( result: ScriptValidationResult, knowledge_graph_modules: set[str], ) -> list[dict[str, Any]]: """ Detect and categorize hallucinations in the validation result. Identifies invalid methods, attributes, and parameters that only exist on classes from the knowledge graph to avoid false positives for external classes. Args: result: ScriptValidationResult containing all validation results. knowledge_graph_modules: Set of module names in the knowledge graph. Returns: List of dictionaries describing detected hallucinations, each containing: - type: Hallucination type (METHOD_NOT_FOUND, ATTRIBUTE_NOT_FOUND, INVALID_PARAMETERS) - location: Line number where the hallucination was found - description: Human-readable description of the issue - suggestion: Optional suggested correction """ hallucinations = [] reported_items = set() # Track reported items to avoid duplicates # Check method calls (only for knowledge graph classes) for val in result.method_validations: if (val.validation.status == ValidationStatus.NOT_FOUND and val.method_call.object_type and is_from_knowledge_graph(val.method_call.object_type, knowledge_graph_modules)): # Create unique key to avoid duplicates key = (val.method_call.line_number, val.method_call.method_name, val.method_call.object_type) if key not in reported_items: reported_items.add(key) hallucinations.append({ "type": "METHOD_NOT_FOUND", "location": f"line {val.method_call.line_number}", "description": f"Method '{val.method_call.method_name}' not found on class '{val.method_call.object_type}'", "suggestion": val.validation.suggestions[0] if val.validation.suggestions else None, }) # Check attributes (only for knowledge graph classes) - but skip if already reported as method for val in result.attribute_validations: if (val.validation.status == ValidationStatus.NOT_FOUND and val.attribute_access.object_type and is_from_knowledge_graph(val.attribute_access.object_type, knowledge_graph_modules)): # Create unique key - if this was already reported as a method, skip it key = (val.attribute_access.line_number, val.attribute_access.attribute_name, val.attribute_access.object_type) if key not in reported_items: reported_items.add(key) hallucinations.append({ "type": "ATTRIBUTE_NOT_FOUND", "location": f"line {val.attribute_access.line_number}", "description": f"Attribute '{val.attribute_access.attribute_name}' not found on class '{val.attribute_access.object_type}'", }) # Check parameter issues (only for knowledge graph methods) for val in result.method_validations: if (val.parameter_validation and val.parameter_validation.status == ValidationStatus.INVALID and val.method_call.object_type and is_from_knowledge_graph(val.method_call.object_type, knowledge_graph_modules)): hallucinations.append({ "type": "INVALID_PARAMETERS", "location": f"line {val.method_call.line_number}", "description": f"Invalid parameters for method '{val.method_call.method_name}': {val.parameter_validation.message}", }) return hallucinations

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/AI-enthusiasts/crawl4ai-rag-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

utils.py•12.6 KiB