Semantic Search MCP Server

Overview Schema Related Servers Score Discussions

schemas.py•37.1 KiB

"""
Schema-Guided Reasoning (SGR) schemas for code search.

These schemas enforce a structured reasoning process:
0. Repo Context - understand repository structure, language, framework
1. Question Analysis - extract concepts from the question  
2. Technical Terms - map concepts to actual code identifiers
3. Search Planning - create actionable search steps
4. Result Interpretation - analyze tool outputs
5. Final Answer - produce structured response

Using Pydantic models ensures LLM outputs conform to expected structure
via Structured Output / Constrained Decoding.
"""

from __future__ import annotations

from typing import List, Literal, Optional, Dict, Any, Union
from pydantic import BaseModel, Field, field_validator, model_validator


# ============================================
# STEP 0: Repository Context Analysis
# ============================================

class RepoContext(BaseModel):
    """
    Step 0: Analyze repository structure to understand context.
    
    This step runs ONCE at the start to:
    1. Identify programming language(s)
    2. Detect framework/library
    3. Understand project structure
    4. Learn naming conventions
    """
    
    primary_language: str = Field(
        description=(
            "Primary programming language of the repository. "
            "Use exact language name as detected: Python, Go, JavaScript, TypeScript, "
            "Java, C++, C#, Ruby, Rust, CodeQL, Kotlin, Scala, etc."
        )
    )
    
    languages: Dict[str, int] = Field(
        default_factory=dict,
        description=(
            "All detected languages with file counts. "
            "Example: {'C++': 2335, 'Python': 1056, 'C': 8}. "
            "This shows the full language distribution in the repository."
        )
    )
    
    framework: Optional[str] = Field(
        default=None,
        description=(
            "Main framework or library if detected. "
            "Examples: 'Django', 'Flask', 'Gin', 'Express', 'React', 'Spring'"
        )
    )
    
    project_type: Literal[
        "web_framework",    # Django, Flask, Gin, Express
        "library",          # Standalone library/SDK
        "cli_tool",         # Command-line application
        "desktop_app",      # Desktop application
        "data_science",     # ML/Data processing
        "game_engine",      # Game/Graphics engine
        "other"
    ] = Field(
        description="Type of project"
    )
    
    key_directories: List[str] = Field(
        default_factory=list,
        description=(
            "Important directories discovered. "
            "Examples: ['django/core/', 'django/db/', 'tree.go', 'context.go']"
        )
    )
    
    file_patterns: List[str] = Field(
        default_factory=list,
        description=(
            "Common file patterns in this repo. "
            "Examples: ['*.py', '*.go', '*_test.go', 'test_*.py']"
        )
    )
    
    naming_conventions: str = Field(
        default="",
        max_length=500,
        description=(
            "Observed naming conventions. "
            "Examples: 'snake_case functions, PascalCase classes', "
            "'Go style: exported functions start with uppercase'"
        )
    )
    
    @field_validator('key_directories')
    @classmethod
    def validate_key_dirs(cls, v: List[str]) -> List[str]:
        return v[:10] if v else []
    
    @field_validator('file_patterns')
    @classmethod  
    def validate_patterns(cls, v: List[str]) -> List[str]:
        return v[:5] if v else ["*"]


# ============================================
# STEP 1: Question Analysis (concepts only)
# ============================================

class QuestionAnalysis(BaseModel):
    """
    Step 1: Analyze the user's question to extract HIGH-LEVEL concepts.
    
    This step does NOT generate grep patterns yet - that's step 2.
    Focus on understanding WHAT the user wants to find.
    """
    
    concepts: List[str] = Field(
        default_factory=list,
        description=(
            "High-level concepts the question is asking about. "
            "Examples: 'lazy evaluation', 'middleware chain', 'route matching', "
            "'context reuse', 'parallel rendering', 'memory persistence'. "
            "Extract 3-8 concepts from the question."
        )
    )
    
    user_problem: str = Field(
        default="",
        max_length=500,
        description=(
            "What problem is the user trying to solve? "
            "Summarize in 1-2 sentences."
        )
    )
    
    what_to_find: str = Field(
        default="",
        max_length=500,
        description=(
            "What specific code/implementation does the user need to find? "
            "E.g., 'the code that handles middleware execution order'"
        )
    )
    
    question_type: Literal[
        "how_it_works",      # Understanding implementation details
        "where_is_it",       # Finding specific code location
        "why_behavior",      # Explaining unexpected behavior
        "how_to_use",        # API/usage questions
        "debugging",         # Finding root cause of issues
    ] = Field(
        description=(
            "Type of question determines search strategy. "
            "how_it_works: need to find implementation details. "
            "where_is_it: need to locate specific code. "
            "why_behavior: need to trace code flow to explain behavior. "
            "how_to_use: need to find examples and API definitions. "
            "debugging: need to find error handling and edge cases."
        )
    )
    
    @field_validator('concepts')
    @classmethod
    def validate_concepts(cls, v: List[str]) -> List[str]:
        """Ensure 3-8 concepts."""
        if not v:
            return ["implementation"]
        return v[:8]


# ============================================
# STEP 2: Technical Terms Extraction
# ============================================

class TechnicalTerm(BaseModel):
    """A single technical term with context."""
    
    term: str = Field(
        description="Exact string to grep for in code"
    )
    
    term_type: Literal[
        "class_name",
        "function_name", 
        "method_name",
        "variable_name",
        "constant",
        "file_name",
        "pattern"  # regex pattern
    ] = Field(
        description="What type of identifier this is"
    )
    
    confidence: Literal["high", "medium", "low"] = Field(
        default="medium",
        description="How confident this term exists in the codebase"
    )
    
    reasoning: str = Field(
        default="",
        max_length=500,
        description="Why this term was chosen"
    )


class TechnicalTerms(BaseModel):
    """
    Step 2: Map concepts to actual technical identifiers.
    
    This step uses RepoContext to generate REAL code identifiers
    based on the language, framework, and naming conventions.
    
    Key insight: We know the repo (e.g., Django, Gin) so we can
    suggest framework-specific class/function names.
    """
    
    primary_terms: List[TechnicalTerm] = Field(
        default_factory=list,
        description=(
            "Primary technical terms - most likely to find relevant code. "
            "These should be UNIQUE identifiers specific to the concept. "
            "For Django: QuerySet, _result_cache, BaseHandler. "
            "For Gin: tree, node, sync.Pool, Context.Copy. "
            "Provide 3-8 terms."
        )
    )
    
    secondary_terms: List[TechnicalTerm] = Field(
        default_factory=list,
        description=(
            "Secondary/backup terms if primary don't work. "
            "More generic but still relevant. "
            "Provide 2-5 terms."
        )
    )
    
    likely_files: List[str] = Field(
        default_factory=list,
        description=(
            "Specific files or directories to search. "
            "Based on repo structure from Step 0. "
            "Examples: 'django/core/handlers/', 'tree.go', 'context.go'"
        )
    )
    
    search_patterns: List[str] = Field(
        default_factory=list,
        description=(
            "Regex patterns combining multiple terms. "
            "Examples: 'class.*Handler', 'def.*middleware', 'func.*Route'"
        )
    )
    
    @field_validator('primary_terms')
    @classmethod
    def validate_primary(cls, v: List[TechnicalTerm]) -> List[TechnicalTerm]:
        """Ensure 3-8 primary terms."""
        if not v:
            return []
        return v[:8]
    
    @field_validator('secondary_terms')
    @classmethod
    def validate_secondary(cls, v: List[TechnicalTerm]) -> List[TechnicalTerm]:
        """Ensure 2-5 secondary terms."""
        return v[:5] if v else []
    
    @field_validator('likely_files')
    @classmethod
    def validate_files(cls, v: List[str]) -> List[str]:
        """Ensure 1-6 files."""
        if not v:
            return ["src/"]
        return v[:6]
    
    @field_validator('search_patterns')
    @classmethod
    def validate_patterns(cls, v: List[str]) -> List[str]:
        """Ensure 1-5 patterns."""
        return v[:5] if v else []
    
    def get_all_terms(self) -> List[str]:
        """Get all term strings for grep."""
        terms = [t.term for t in self.primary_terms]
        terms.extend([t.term for t in self.secondary_terms])
        return terms
    
    def get_high_confidence_terms(self) -> List[str]:
        """Get only high-confidence terms."""
        return [t.term for t in self.primary_terms if t.confidence == "high"]


# ============================================
# STEP 2: Search Planning - Typed Actions
# ============================================

# --- Base class for all actions ---
class BaseSearchAction(BaseModel):
    """Base class for search actions with common fields."""
    
    purpose: str = Field(
        default="Search for relevant code",
        max_length=500,
        description="Why we're executing this action - what we hope to find"
    )
    
    expected_result: str = Field(
        default="Find relevant code",
        max_length=500,
        description="What we expect to find if successful"
    )


# --- Typed Action Classes with REQUIRED params ---

class GrepAction(BaseSearchAction):
    """
    Grep action - search file contents using regex.
    Pattern is REQUIRED.
    """
    tool: Literal["grep"] = Field(
        default="grep",
        description="Tool type identifier"
    )
    
    pattern: str = Field(
        min_length=1,
        max_length=500,
        description=(
            "REQUIRED: Regex pattern to search for. "
            "Examples: 'QuerySet', 'class.*Handler', 'def.*process', 'func.*Route'"
        )
    )
    
    path: Optional[str] = Field(
        default=None,
        description="Optional: Directory to search in (e.g., 'src/', 'django/core/')"
    )
    
    glob: Optional[str] = Field(
        default=None,
        description="Optional: File glob pattern (e.g., '*.py', '*.go')"
    )
    
    case_insensitive: bool = Field(
        default=False,
        description="Optional: Case-insensitive search"
    )
    
    context_lines: int = Field(
        default=3,
        ge=0,
        le=10,
        description="Optional: Number of context lines around matches"
    )
    
    def to_params(self) -> Dict[str, Any]:
        """Convert to params dict for tool execution."""
        params = {"pattern": self.pattern}
        if self.path:
            params["path"] = self.path
        if self.glob:
            params["glob"] = self.glob
        if self.case_insensitive:
            params["case_insensitive"] = True
        if self.context_lines != 3:
            params["context_lines"] = self.context_lines
        return params


class ReadFileAction(BaseSearchAction):
    """
    Read file action - read contents of a specific file.
    File path is REQUIRED.
    """
    tool: Literal["read_file"] = Field(
        default="read_file",
        description="Tool type identifier"
    )
    
    file_path: str = Field(
        min_length=1,
        max_length=500,
        description=(
            "REQUIRED: Path to the file to read. "
            "Examples: 'django/db/models/query.py', 'tree.go', 'src/core/handler.ts'"
        )
    )
    
    start_line: Optional[int] = Field(
        default=None,
        ge=1,
        description="Optional: Start reading from this line"
    )
    
    end_line: Optional[int] = Field(
        default=None,
        ge=1,
        description="Optional: Stop reading at this line"
    )
    
    def to_params(self) -> Dict[str, Any]:
        """Convert to params dict for tool execution."""
        params = {"file_path": self.file_path}
        if self.start_line:
            params["start_line"] = self.start_line
        if self.end_line:
            params["end_line"] = self.end_line
        return params


class ListDirAction(BaseSearchAction):
    """
    List directory action - explore directory structure.
    Path defaults to current directory.
    """
    tool: Literal["list_dir"] = Field(
        default="list_dir",
        description="Tool type identifier"
    )
    
    path: str = Field(
        default=".",
        max_length=500,
        description="Directory path to list (default: current directory)"
    )
    
    max_depth: int = Field(
        default=2,
        ge=1,
        le=5,
        description="Maximum depth to traverse"
    )
    
    def to_params(self) -> Dict[str, Any]:
        """Convert to params dict for tool execution."""
        return {"path": self.path, "max_depth": self.max_depth}


class GlobSearchAction(BaseSearchAction):
    """
    Glob search action - find files by name pattern.
    Pattern is REQUIRED.
    """
    tool: Literal["glob_search"] = Field(
        default="glob_search",
        description="Tool type identifier"
    )
    
    pattern: str = Field(
        min_length=1,
        max_length=500,
        description=(
            "REQUIRED: Glob pattern to match files. "
            "Examples: '*_service.py', '*Handler.java', '*_test.go', '*.qll'"
        )
    )
    
    path: Optional[str] = Field(
        default=None,
        description="Optional: Directory to search in"
    )
    
    def to_params(self) -> Dict[str, Any]:
        """Convert to params dict for tool execution."""
        params = {"pattern": self.pattern}
        if self.path:
            params["path"] = self.path
        return params


# --- Union type for discriminated union ---
TypedSearchAction = Union[GrepAction, ReadFileAction, ListDirAction, GlobSearchAction]


# --- Legacy SearchAction for backward compatibility ---
class SearchAction(BaseModel):
    """
    Legacy search action - use TypedSearchAction for new code.
    Kept for backward compatibility.
    """
    
    tool: Literal["grep", "read_file", "list_dir", "glob_search"] = Field(
        description="Tool to use for this action"
    )
    
    params: Dict[str, Any] = Field(
        default_factory=dict,
        description=(
            "Parameters for the tool. "
            "grep: {pattern, path?, glob?, case_insensitive?, context_lines?}. "
            "read_file: {file_path, start_line?, end_line?}. "
            "list_dir: {path, max_depth?}. "
            "glob_search: {pattern, path?}."
        )
    )
    
    purpose: str = Field(
        default="Search for relevant code",
        max_length=500,
        description="Why we're executing this action - what we hope to find"
    )
    
    expected_result: str = Field(
        default="Find relevant code",
        max_length=500,
        description="What we expect to find if successful"
    )
    
    @classmethod
    def from_typed(cls, action: TypedSearchAction) -> "SearchAction":
        """Convert typed action to legacy SearchAction."""
        return cls(
            tool=action.tool,
            params=action.to_params(),
            purpose=action.purpose,
            expected_result=action.expected_result,
        )
    
    def to_typed(self) -> Optional[TypedSearchAction]:
        """Convert legacy action to typed action, returns None if invalid."""
        try:
            if self.tool == "grep":
                pattern = self.params.get("pattern", "")
                if not pattern:
                    return None
                return GrepAction(
                    pattern=pattern,
                    path=self.params.get("path"),
                    glob=self.params.get("glob"),
                    case_insensitive=self.params.get("case_insensitive", False),
                    context_lines=self.params.get("context_lines", 3),
                    purpose=self.purpose,
                    expected_result=self.expected_result,
                )
            elif self.tool == "read_file":
                file_path = self.params.get("file_path", "")
                if not file_path:
                    return None
                return ReadFileAction(
                    file_path=file_path,
                    start_line=self.params.get("start_line"),
                    end_line=self.params.get("end_line"),
                    purpose=self.purpose,
                    expected_result=self.expected_result,
                )
            elif self.tool == "list_dir":
                return ListDirAction(
                    path=self.params.get("path", "."),
                    max_depth=self.params.get("max_depth", 2),
                    purpose=self.purpose,
                    expected_result=self.expected_result,
                )
            elif self.tool == "glob_search":
                pattern = self.params.get("pattern", "")
                if not pattern:
                    return None
                return GlobSearchAction(
                    pattern=pattern,
                    path=self.params.get("path"),
                    purpose=self.purpose,
                    expected_result=self.expected_result,
                )
        except Exception:
            return None
        return None


# --- New Typed Search Plan ---
class TypedSearchPlan(BaseModel):
    """
    Search plan with TYPED actions - ensures all required params are present.
    
    Uses discriminated union to enforce correct params for each tool type.
    This is the PREFERRED schema for LLM structured output.
    """
    
    strategy: Literal[
        "broad_to_narrow",
        "specific_file",
        "structure_first",
        "keyword_hunt",
        "trace_flow",
    ] = Field(
        description=(
            "Search strategy to use. "
            "keyword_hunt: search for unique technical terms directly (MOST COMMON). "
            "broad_to_narrow: start with wide patterns, narrow based on results. "
            "specific_file: already know the file, read it directly. "
            "structure_first: explore directories first. "
            "trace_flow: follow execution path from known entry point."
        )
    )
    
    grep_actions: List[GrepAction] = Field(
        default_factory=list,
        description="Grep search actions - each MUST have a pattern"
    )
    
    read_file_actions: List[ReadFileAction] = Field(
        default_factory=list,
        description="File read actions - each MUST have a file_path"
    )
    
    list_dir_actions: List[ListDirAction] = Field(
        default_factory=list,
        description="Directory listing actions"
    )
    
    glob_search_actions: List[GlobSearchAction] = Field(
        default_factory=list,
        description="Glob search actions - each MUST have a pattern"
    )
    
    strategy_reasoning: str = Field(
        default="",
        max_length=1000,
        description="Why this strategy was chosen"
    )
    
    @field_validator('grep_actions')
    @classmethod
    def validate_grep(cls, v: List[GrepAction]) -> List[GrepAction]:
        """Limit grep actions."""
        return v[:5] if v else []
    
    @field_validator('read_file_actions')
    @classmethod
    def validate_read(cls, v: List[ReadFileAction]) -> List[ReadFileAction]:
        """Limit read actions."""
        return v[:3] if v else []
    
    @field_validator('list_dir_actions')
    @classmethod
    def validate_list(cls, v: List[ListDirAction]) -> List[ListDirAction]:
        """Limit list actions."""
        return v[:2] if v else []
    
    @field_validator('glob_search_actions')
    @classmethod
    def validate_glob(cls, v: List[GlobSearchAction]) -> List[GlobSearchAction]:
        """Limit glob actions."""
        return v[:3] if v else []
    
    def get_all_actions(self) -> List[SearchAction]:
        """Get all actions as legacy SearchAction list for execution."""
        actions = []
        for action in self.grep_actions:
            actions.append(SearchAction.from_typed(action))
        for action in self.read_file_actions:
            actions.append(SearchAction.from_typed(action))
        for action in self.list_dir_actions:
            actions.append(SearchAction.from_typed(action))
        for action in self.glob_search_actions:
            actions.append(SearchAction.from_typed(action))
        return actions
    
    def get_total_actions(self) -> int:
        """Get total number of actions."""
        return (
            len(self.grep_actions) +
            len(self.read_file_actions) +
            len(self.list_dir_actions) +
            len(self.glob_search_actions)
        )


# --- Legacy SearchPlan for backward compatibility ---
class SearchPlan(BaseModel):
    """
    Legacy search plan - use TypedSearchPlan for new code.
    Kept for backward compatibility.
    """
    
    strategy: Literal[
        "broad_to_narrow",
        "specific_file",
        "structure_first",
        "keyword_hunt",
        "trace_flow",
    ] = Field(
        description=(
            "Search strategy to use. "
            "broad_to_narrow: start with wide patterns, narrow based on results. "
            "specific_file: already know the file, search within it. "
            "structure_first: explore directories to understand project layout. "
            "keyword_hunt: search for unique technical terms directly. "
            "trace_flow: follow execution path from known entry point."
        )
    )
    
    actions: List[SearchAction] = Field(
        default_factory=list,
        description="First 1-5 search actions to execute"
    )
    
    strategy_reasoning: str = Field(
        default="",
        max_length=1000,
        description="Why this strategy was chosen for this question"
    )
    
    @field_validator('actions')
    @classmethod
    def validate_actions(cls, v: List[SearchAction]) -> List[SearchAction]:
        """Ensure 1-5 actions."""
        if not v:
            # Provide a default grep action
            return [SearchAction(
                tool="grep",
                params={"pattern": ".*"},
                purpose="Initial broad search",
                expected_result="Find relevant files",
            )]
        return v[:5]
    
    @classmethod
    def from_typed(cls, typed_plan: TypedSearchPlan) -> "SearchPlan":
        """Convert TypedSearchPlan to legacy SearchPlan."""
        return cls(
            strategy=typed_plan.strategy,
            actions=typed_plan.get_all_actions(),
            strategy_reasoning=typed_plan.strategy_reasoning,
        )


# ============================================
# STEP 3: Result Interpretation
# ============================================

class SearchResultInterpretation(BaseModel):
    """
    Interpretation of a single search result.
    
    Forces LLM to analyze what was found and decide next steps.
    """
    
    action_executed: str = Field(
        default="",
        max_length=500,
        description="Brief description of what action was executed"
    )
    
    found_relevant: bool = Field(
        default=False,
        description="Whether anything relevant to the question was found"
    )
    
    key_findings: List[str] = Field(
        default_factory=list,
        description=(
            "Key findings from this search (max 10). "
            "Include: file paths found, function/class names discovered, "
            "important code patterns observed."
        )
    )
    
    new_leads: List[str] = Field(
        default_factory=list,
        description=(
            "New search directions discovered from this result (max 5). "
            "E.g., 'found reference to BaseHandler, should search for its definition'"
        )
    )
    
    confidence: Literal["low", "medium", "high"] = Field(
        default="low",
        description=(
            "Confidence that we're on the right track. "
            "low: uncertain, might be wrong direction. "
            "medium: promising but need more confirmation. "
            "high: confident this is relevant code."
        )
    )
    
    next_action_needed: bool = Field(
        default=True,
        description="Whether we need more search actions to answer the question"
    )
    
    @field_validator('key_findings')
    @classmethod
    def validate_findings(cls, v: List[str]) -> List[str]:
        """Limit to 10 findings."""
        return v[:10] if v else []
    
    @field_validator('new_leads')
    @classmethod
    def validate_leads(cls, v: List[str]) -> List[str]:
        """Limit to 5 leads."""
        return v[:5] if v else []


# ============================================
# STEP 4: Final Answer
# ============================================

class CodeLocation(BaseModel):
    """A single code location found."""
    
    file_path: str = Field(
        description="Path to the file relative to repository root"
    )
    
    relevant_elements: List[str] = Field(
        default_factory=list,
        description="Classes, functions, methods found in this file that are relevant (max 10)"
    )
    
    line_range: Optional[str] = Field(
        default=None,
        description="Approximate line range if known (e.g., '100-150')"
    )
    
    relevance: str = Field(
        default="",
        max_length=500,
        description="Why this code is relevant to answering the question"
    )
    
    @field_validator('relevant_elements')
    @classmethod
    def validate_elements(cls, v: List[str]) -> List[str]:
        """Ensure 1-10 elements."""
        if not v:
            return ["(unknown)"]
        return v[:10]


class FinalAnswer(BaseModel):
    """
    Structured final answer.
    
    Contains summary, code locations, reasoning chain, and self-assessment.
    """
    
    summary: str = Field(
        default="",
        max_length=1000,
        description="Clear, concise answer to the user's question"
    )
    
    code_locations: List[CodeLocation] = Field(
        default_factory=list,
        description="Relevant code locations found (max 10)"
    )
    
    reasoning_chain: List[str] = Field(
        default_factory=list,
        description=(
            "Steps taken to arrive at this answer (max 10). "
            "E.g., ['Searched for QuerySet class', 'Found _result_cache in query.py', "
            "'Traced __iter__ method which calls _fetch_all']"
        )
    )
    
    confidence: Literal["low", "medium", "high"] = Field(
        default="low",
        description="Overall confidence in the answer"
    )
    
    potential_gaps: List[str] = Field(
        default_factory=list,
        description="What we might have missed or should verify further (max 5)"
    )
    
    @field_validator('code_locations')
    @classmethod
    def validate_locations(cls, v: List[CodeLocation]) -> List[CodeLocation]:
        """Ensure 1-10 locations."""
        if not v:
            return [CodeLocation(
                file_path="(not found)",
                relevant_elements=["(unknown)"],
                relevance="No relevant code found",
            )]
        return v[:10]
    
    @field_validator('reasoning_chain')
    @classmethod
    def validate_chain(cls, v: List[str]) -> List[str]:
        """Ensure 1-10 steps."""
        if not v:
            return ["Searched codebase"]
        return v[:10]
    
    @field_validator('potential_gaps')
    @classmethod
    def validate_gaps(cls, v: List[str]) -> List[str]:
        """Limit to 5 gaps."""
        return v[:5] if v else []


# ============================================
# Iteration Tracking
# ============================================

class SearchIteration(BaseModel):
    """One complete search iteration."""
    
    iteration_number: int = Field(ge=1, le=10)
    
    action: SearchAction = Field(
        description="Action that was executed"
    )
    
    tool_result_summary: Optional[str] = Field(
        default=None,
        max_length=2500,
        description="Summary of tool output (filled after execution)"
    )
    
    interpretation: Optional[SearchResultInterpretation] = Field(
        default=None,
        description="Interpretation of the result (filled after execution)"
    )
    
    next_step: Literal[
        "continue_search",    # Need more searches
        "need_more_context",  # Found something but need to read more
        "ready_to_answer",    # Have enough information to answer
    ] = Field(
        default="continue_search",
        description="What to do after this iteration"
    )


# ============================================
# Full Process Schema
# ============================================

class CodeSearchProcess(BaseModel):
    """
    Complete search process schema.
    
    This is the top-level schema that encompasses the entire
    Schema-Guided Reasoning process for code search.
    """
    
    # Input
    original_question: str = Field(
        description="The original user question"
    )
    
    repository_path: str = Field(
        description="Path to the repository being searched"
    )
    
    # Step 1: Question Analysis (mandatory)
    question_analysis: QuestionAnalysis = Field(
        description="Analysis of the question"
    )
    
    # Step 2: Initial Plan (mandatory)
    initial_plan: SearchPlan = Field(
        description="Initial search plan"
    )
    
    # Step 3: Search Iterations (0-10)
    search_iterations: List[SearchIteration] = Field(
        default_factory=list,
        description="Executed search iterations with results (max 10)"
    )
    
    # Step 4: Final Answer (mandatory when complete)
    final_answer: Optional[FinalAnswer] = Field(
        default=None,
        description="Final structured answer"
    )
    
    # Metadata
    total_tool_calls: int = Field(
        default=0,
        ge=0,
        description="Total number of tool calls made"
    )
    
    status: Literal["in_progress", "completed", "failed"] = Field(
        default="in_progress",
        description="Current status of the search process"
    )
    
    error: Optional[str] = Field(
        default=None,
        description="Error message if search failed"
    )
    
    @field_validator('search_iterations')
    @classmethod
    def validate_iterations(cls, v: List[SearchIteration]) -> List[SearchIteration]:
        """Limit to 10 iterations."""
        return v[:10] if v else []


# ============================================
# Tool Response Schemas (TypedDict-style for better type hints)
# ============================================

class GrepMatch(BaseModel):
    """Single grep match."""
    file: str = ""
    line: int = 0
    content: str = ""


class GrepResult(BaseModel):
    """Result from grep tool."""
    
    matches: List[GrepMatch] = Field(default_factory=list)
    files: List[str] = Field(default_factory=list)
    files_count: int = 0
    total_matches: int = 0
    truncated: bool = False
    error: Optional[str] = None


class ReadFileResult(BaseModel):
    """Result from read_file tool."""
    
    content: str = ""
    total_lines: int = 0
    read_lines: int = 0
    truncated: bool = False
    error: Optional[str] = None


class ListDirResult(BaseModel):
    """Result from list_dir tool."""
    
    structure: str = ""
    files_count: int = 0
    dirs_count: int = 0
    error: Optional[str] = None


class GlobSearchResult(BaseModel):
    """Result from glob_search tool."""
    
    files: List[str] = Field(default_factory=list)
    count: int = 0
    truncated: bool = False
    error: Optional[str] = None


# Union type for all tool results
ToolResult = Union[GrepResult, ReadFileResult, ListDirResult, GlobSearchResult]


# ============================================
# Next Action Decision
# ============================================

class NextActionDecision(BaseModel):
    """
    Decision about what to do next in the search process.
    
    All params are FLAT FIELDS (not Dict) because Gemini has issues with Dict[str, Any].
    """
    
    should_continue: bool = Field(
        default=False,
        description="Whether to continue searching"
    )
    
    reason: str = Field(
        default="",
        max_length=500,
        description="Reason for this decision"
    )
    
    ready_for_answer: bool = Field(
        default=False,
        description="Whether we have enough information to answer"
    )
    
    # ========================================
    # Next action fields - ALL FLAT for Gemini
    # ========================================
    has_next_action: bool = Field(
        default=False,
        description="Whether there is a next action to execute"
    )
    
    next_action_tool: Optional[Literal["grep", "read_file", "list_dir", "glob_search"]] = Field(
        default=None,
        description="Tool to use: grep, read_file, list_dir, or glob_search"
    )
    
    # FLAT params for grep tool
    grep_pattern: Optional[str] = Field(
        default=None,
        description="REQUIRED for grep: regex pattern to search, e.g. 'middleware', 'class.*Handler'"
    )
    grep_path: Optional[str] = Field(
        default=None,
        description="Optional for grep: directory to search in, e.g. 'src/core/'"
    )
    
    # FLAT params for read_file tool
    file_path: Optional[str] = Field(
        default=None,
        description="REQUIRED for read_file: path to file, e.g. 'src/handlers/base.py'"
    )
    
    # FLAT params for list_dir tool
    list_dir_path: Optional[str] = Field(
        default=None,
        description="REQUIRED for list_dir: directory path, e.g. 'src/core/'"
    )
    
    # FLAT params for glob_search tool
    glob_pattern: Optional[str] = Field(
        default=None,
        description="REQUIRED for glob_search: glob pattern, e.g. '*.py', '*middleware*'"
    )
    glob_path: Optional[str] = Field(
        default=None,
        description="Optional for glob_search: directory to search in"
    )
    
    next_action_purpose: Optional[str] = Field(
        default=None,
        max_length=500,
        description="Why this action is needed"
    )
    
    next_action_expected: Optional[str] = Field(
        default=None,
        max_length=500,
        description="What we expect to find"
    )
    
    # Keep for backward compatibility but will be ignored
    next_action_params: Optional[Dict[str, Any]] = Field(
        default=None,
        description="DEPRECATED - use flat fields above instead"
    )
    
    def get_next_action(self) -> Optional[SearchAction]:
        """Reconstruct SearchAction from flat fields, with validation."""
        action, _ = self._get_next_action_with_fallback_info()
        return action
    
    def _get_next_action_with_fallback_info(self) -> tuple[Optional[SearchAction], bool]:
        """
        Reconstruct SearchAction from FLAT fields (not Dict).
        
        Returns:
            Tuple of (action, used_fallback). used_fallback is True if params
            came from deprecated next_action_params dict.
        """
        if not self.has_next_action or not self.next_action_tool:
            return None, False
        
        params: Dict[str, Any] = {}
        used_fallback = False
        
        # Priority 1: Use NEW flat fields (preferred)
        if self.next_action_tool == "grep":
            if self.grep_pattern:
                params["pattern"] = self.grep_pattern
                if self.grep_path:
                    params["path"] = self.grep_path
                    
        elif self.next_action_tool == "read_file":
            if self.file_path:
                params["file_path"] = self.file_path
                
        elif self.next_action_tool == "list_dir":
            if self.list_dir_path:
                params["path"] = self.list_dir_path
                
        elif self.next_action_tool == "glob_search":
            if self.glob_pattern:
                params["pattern"] = self.glob_pattern
                if self.glob_path:
                    params["path"] = self.glob_path
        
        # Priority 2: Fallback to deprecated next_action_params dict
        if not params and self.next_action_params:
            params = self.next_action_params
            used_fallback = True
        
        # Validate required params for each tool
        if self.next_action_tool == "grep":
            if not params.get("pattern"):
                return None, False  # grep needs pattern
        elif self.next_action_tool == "read_file":
            if not params.get("file_path"):
                return None, False  # read_file needs file_path
        elif self.next_action_tool == "glob_search":
            if not params.get("pattern"):
                return None, False  # glob_search needs pattern
        elif self.next_action_tool == "list_dir":
            if not params.get("path"):
                return None, False  # list_dir needs path
        
        action = SearchAction(
            tool=self.next_action_tool,
            params=params,
            purpose=self.next_action_purpose or "Continue search",
            expected_result=self.next_action_expected or "Find relevant code",
        )
        return action, used_fallback

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mix0z/Semantic-Search-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

schemas.py•37.1 KiB

"""
Schema-Guided Reasoning (SGR) schemas for code search.

These schemas enforce a structured reasoning process:
0. Repo Context - understand repository structure, language, framework
1. Question Analysis - extract concepts from the question  
2. Technical Terms - map concepts to actual code identifiers
3. Search Planning - create actionable search steps
4. Result Interpretation - analyze tool outputs
5. Final Answer - produce structured response

Using Pydantic models ensures LLM outputs conform to expected structure
via Structured Output / Constrained Decoding.
"""

from __future__ import annotations

from typing import List, Literal, Optional, Dict, Any, Union
from pydantic import BaseModel, Field, field_validator, model_validator


# ============================================
# STEP 0: Repository Context Analysis
# ============================================

class RepoContext(BaseModel):
    """
    Step 0: Analyze repository structure to understand context.
    
    This step runs ONCE at the start to:
    1. Identify programming language(s)
    2. Detect framework/library
    3. Understand project structure
    4. Learn naming conventions
    """
    
    primary_language: str = Field(
        description=(
            "Primary programming language of the repository. "
            "Use exact language name as detected: Python, Go, JavaScript, TypeScript, "
            "Java, C++, C#, Ruby, Rust, CodeQL, Kotlin, Scala, etc."
        )
    )
    
    languages: Dict[str, int] = Field(
        default_factory=dict,
        description=(
            "All detected languages with file counts. "
            "Example: {'C++': 2335, 'Python': 1056, 'C': 8}. "
            "This shows the full language distribution in the repository."
        )
    )
    
    framework: Optional[str] = Field(
        default=None,
        description=(
            "Main framework or library if detected. "
            "Examples: 'Django', 'Flask', 'Gin', 'Express', 'React', 'Spring'"
        )
    )
    
    project_type: Literal[
        "web_framework",    # Django, Flask, Gin, Express
        "library",          # Standalone library/SDK
        "cli_tool",         # Command-line application
        "desktop_app",      # Desktop application
        "data_science",     # ML/Data processing
        "game_engine",      # Game/Graphics engine
        "other"
    ] = Field(
        description="Type of project"
    )
    
    key_directories: List[str] = Field(
        default_factory=list,
        description=(
            "Important directories discovered. "
            "Examples: ['django/core/', 'django/db/', 'tree.go', 'context.go']"
        )
    )
    
    file_patterns: List[str] = Field(
        default_factory=list,
        description=(
            "Common file patterns in this repo. "
            "Examples: ['*.py', '*.go', '*_test.go', 'test_*.py']"
        )
    )
    
    naming_conventions: str = Field(
        default="",
        max_length=500,
        description=(
            "Observed naming conventions. "
            "Examples: 'snake_case functions, PascalCase classes', "
            "'Go style: exported functions start with uppercase'"
        )
    )
    
    @field_validator('key_directories')
    @classmethod
    def validate_key_dirs(cls, v: List[str]) -> List[str]:
        return v[:10] if v else []
    
    @field_validator('file_patterns')
    @classmethod  
    def validate_patterns(cls, v: List[str]) -> List[str]:
        return v[:5] if v else ["*"]


# ============================================
# STEP 1: Question Analysis (concepts only)
# ============================================

class QuestionAnalysis(BaseModel):
    """
    Step 1: Analyze the user's question to extract HIGH-LEVEL concepts.
    
    This step does NOT generate grep patterns yet - that's step 2.
    Focus on understanding WHAT the user wants to find.
    """
    
    concepts: List[str] = Field(
        default_factory=list,
        description=(
            "High-level concepts the question is asking about. "
            "Examples: 'lazy evaluation', 'middleware chain', 'route matching', "
            "'context reuse', 'parallel rendering', 'memory persistence'. "
            "Extract 3-8 concepts from the question."
        )
    )
    
    user_problem: str = Field(
        default="",
        max_length=500,
        description=(
            "What problem is the user trying to solve? "
            "Summarize in 1-2 sentences."
        )
    )
    
    what_to_find: str = Field(
        default="",
        max_length=500,
        description=(
            "What specific code/implementation does the user need to find? "
            "E.g., 'the code that handles middleware execution order'"
        )
    )
    
    question_type: Literal[
        "how_it_works",      # Understanding implementation details
        "where_is_it",       # Finding specific code location
        "why_behavior",      # Explaining unexpected behavior
        "how_to_use",        # API/usage questions
        "debugging",         # Finding root cause of issues
    ] = Field(
        description=(
            "Type of question determines search strategy. "
            "how_it_works: need to find implementation details. "
            "where_is_it: need to locate specific code. "
            "why_behavior: need to trace code flow to explain behavior. "
            "how_to_use: need to find examples and API definitions. "
            "debugging: need to find error handling and edge cases."
        )
    )
    
    @field_validator('concepts')
    @classmethod
    def validate_concepts(cls, v: List[str]) -> List[str]:
        """Ensure 3-8 concepts."""
        if not v:
            return ["implementation"]
        return v[:8]


# ============================================
# STEP 2: Technical Terms Extraction
# ============================================

class TechnicalTerm(BaseModel):
    """A single technical term with context."""
    
    term: str = Field(
        description="Exact string to grep for in code"
    )
    
    term_type: Literal[
        "class_name",
        "function_name", 
        "method_name",
        "variable_name",
        "constant",
        "file_name",
        "pattern"  # regex pattern
    ] = Field(
        description="What type of identifier this is"
    )
    
    confidence: Literal["high", "medium", "low"] = Field(
        default="medium",
        description="How confident this term exists in the codebase"
    )
    
    reasoning: str = Field(
        default="",
        max_length=500,
        description="Why this term was chosen"
    )


class TechnicalTerms(BaseModel):
    """
    Step 2: Map concepts to actual technical identifiers.
    
    This step uses RepoContext to generate REAL code identifiers
    based on the language, framework, and naming conventions.
    
    Key insight: We know the repo (e.g., Django, Gin) so we can
    suggest framework-specific class/function names.
    """
    
    primary_terms: List[TechnicalTerm] = Field(
        default_factory=list,
        description=(
            "Primary technical terms - most likely to find relevant code. "
            "These should be UNIQUE identifiers specific to the concept. "
            "For Django: QuerySet, _result_cache, BaseHandler. "
            "For Gin: tree, node, sync.Pool, Context.Copy. "
            "Provide 3-8 terms."
        )
    )
    
    secondary_terms: List[TechnicalTerm] = Field(
        default_factory=list,
        description=(
            "Secondary/backup terms if primary don't work. "
            "More generic but still relevant. "
            "Provide 2-5 terms."
        )
    )
    
    likely_files: List[str] = Field(
        default_factory=list,
        description=(
            "Specific files or directories to search. "
            "Based on repo structure from Step 0. "
            "Examples: 'django/core/handlers/', 'tree.go', 'context.go'"
        )
    )
    
    search_patterns: List[str] = Field(
        default_factory=list,
        description=(
            "Regex patterns combining multiple terms. "
            "Examples: 'class.*Handler', 'def.*middleware', 'func.*Route'"
        )
    )
    
    @field_validator('primary_terms')
    @classmethod
    def validate_primary(cls, v: List[TechnicalTerm]) -> List[TechnicalTerm]:
        """Ensure 3-8 primary terms."""
        if not v:
            return []
        return v[:8]
    
    @field_validator('secondary_terms')
    @classmethod
    def validate_secondary(cls, v: List[TechnicalTerm]) -> List[TechnicalTerm]:
        """Ensure 2-5 secondary terms."""
        return v[:5] if v else []
    
    @field_validator('likely_files')
    @classmethod
    def validate_files(cls, v: List[str]) -> List[str]:
        """Ensure 1-6 files."""
        if not v:
            return ["src/"]
        return v[:6]
    
    @field_validator('search_patterns')
    @classmethod
    def validate_patterns(cls, v: List[str]) -> List[str]:
        """Ensure 1-5 patterns."""
        return v[:5] if v else []
    
    def get_all_terms(self) -> List[str]:
        """Get all term strings for grep."""
        terms = [t.term for t in self.primary_terms]
        terms.extend([t.term for t in self.secondary_terms])
        return terms
    
    def get_high_confidence_terms(self) -> List[str]:
        """Get only high-confidence terms."""
        return [t.term for t in self.primary_terms if t.confidence == "high"]


# ============================================
# STEP 2: Search Planning - Typed Actions
# ============================================

# --- Base class for all actions ---
class BaseSearchAction(BaseModel):
    """Base class for search actions with common fields."""
    
    purpose: str = Field(
        default="Search for relevant code",
        max_length=500,
        description="Why we're executing this action - what we hope to find"
    )
    
    expected_result: str = Field(
        default="Find relevant code",
        max_length=500,
        description="What we expect to find if successful"
    )


# --- Typed Action Classes with REQUIRED params ---

class GrepAction(BaseSearchAction):
    """
    Grep action - search file contents using regex.
    Pattern is REQUIRED.
    """
    tool: Literal["grep"] = Field(
        default="grep",
        description="Tool type identifier"
    )
    
    pattern: str = Field(
        min_length=1,
        max_length=500,
        description=(
            "REQUIRED: Regex pattern to search for. "
            "Examples: 'QuerySet', 'class.*Handler', 'def.*process', 'func.*Route'"
        )
    )
    
    path: Optional[str] = Field(
        default=None,
        description="Optional: Directory to search in (e.g., 'src/', 'django/core/')"
    )
    
    glob: Optional[str] = Field(
        default=None,
        description="Optional: File glob pattern (e.g., '*.py', '*.go')"
    )
    
    case_insensitive: bool = Field(
        default=False,
        description="Optional: Case-insensitive search"
    )
    
    context_lines: int = Field(
        default=3,
        ge=0,
        le=10,
        description="Optional: Number of context lines around matches"
    )
    
    def to_params(self) -> Dict[str, Any]:
        """Convert to params dict for tool execution."""
        params = {"pattern": self.pattern}
        if self.path:
            params["path"] = self.path
        if self.glob:
            params["glob"] = self.glob
        if self.case_insensitive:
            params["case_insensitive"] = True
        if self.context_lines != 3:
            params["context_lines"] = self.context_lines
        return params


class ReadFileAction(BaseSearchAction):
    """
    Read file action - read contents of a specific file.
    File path is REQUIRED.
    """
    tool: Literal["read_file"] = Field(
        default="read_file",
        description="Tool type identifier"
    )
    
    file_path: str = Field(
        min_length=1,
        max_length=500,
        description=(
            "REQUIRED: Path to the file to read. "
            "Examples: 'django/db/models/query.py', 'tree.go', 'src/core/handler.ts'"
        )
    )
    
    start_line: Optional[int] = Field(
        default=None,
        ge=1,
        description="Optional: Start reading from this line"
    )
    
    end_line: Optional[int] = Field(
        default=None,
        ge=1,
        description="Optional: Stop reading at this line"
    )
    
    def to_params(self) -> Dict[str, Any]:
        """Convert to params dict for tool execution."""
        params = {"file_path": self.file_path}
        if self.start_line:
            params["start_line"] = self.start_line
        if self.end_line:
            params["end_line"] = self.end_line
        return params


class ListDirAction(BaseSearchAction):
    """
    List directory action - explore directory structure.
    Path defaults to current directory.
    """
    tool: Literal["list_dir"] = Field(
        default="list_dir",
        description="Tool type identifier"
    )
    
    path: str = Field(
        default=".",
        max_length=500,
        description="Directory path to list (default: current directory)"
    )
    
    max_depth: int = Field(
        default=2,
        ge=1,
        le=5,
        description="Maximum depth to traverse"
    )
    
    def to_params(self) -> Dict[str, Any]:
        """Convert to params dict for tool execution."""
        return {"path": self.path, "max_depth": self.max_depth}


class GlobSearchAction(BaseSearchAction):
    """
    Glob search action - find files by name pattern.
    Pattern is REQUIRED.
    """
    tool: Literal["glob_search"] = Field(
        default="glob_search",
        description="Tool type identifier"
    )
    
    pattern: str = Field(
        min_length=1,
        max_length=500,
        description=(
            "REQUIRED: Glob pattern to match files. "
            "Examples: '*_service.py', '*Handler.java', '*_test.go', '*.qll'"
        )
    )
    
    path: Optional[str] = Field(
        default=None,
        description="Optional: Directory to search in"
    )
    
    def to_params(self) -> Dict[str, Any]:
        """Convert to params dict for tool execution."""
        params = {"pattern": self.pattern}
        if self.path:
            params["path"] = self.path
        return params


# --- Union type for discriminated union ---
TypedSearchAction = Union[GrepAction, ReadFileAction, ListDirAction, GlobSearchAction]


# --- Legacy SearchAction for backward compatibility ---
class SearchAction(BaseModel):
    """
    Legacy search action - use TypedSearchAction for new code.
    Kept for backward compatibility.
    """
    
    tool: Literal["grep", "read_file", "list_dir", "glob_search"] = Field(
        description="Tool to use for this action"
    )
    
    params: Dict[str, Any] = Field(
        default_factory=dict,
        description=(
            "Parameters for the tool. "
            "grep: {pattern, path?, glob?, case_insensitive?, context_lines?}. "
            "read_file: {file_path, start_line?, end_line?}. "
            "list_dir: {path, max_depth?}. "
            "glob_search: {pattern, path?}."
        )
    )
    
    purpose: str = Field(
        default="Search for relevant code",
        max_length=500,
        description="Why we're executing this action - what we hope to find"
    )
    
    expected_result: str = Field(
        default="Find relevant code",
        max_length=500,
        description="What we expect to find if successful"
    )
    
    @classmethod
    def from_typed(cls, action: TypedSearchAction) -> "SearchAction":
        """Convert typed action to legacy SearchAction."""
        return cls(
            tool=action.tool,
            params=action.to_params(),
            purpose=action.purpose,
            expected_result=action.expected_result,
        )
    
    def to_typed(self) -> Optional[TypedSearchAction]:
        """Convert legacy action to typed action, returns None if invalid."""
        try:
            if self.tool == "grep":
                pattern = self.params.get("pattern", "")
                if not pattern:
                    return None
                return GrepAction(
                    pattern=pattern,
                    path=self.params.get("path"),
                    glob=self.params.get("glob"),
                    case_insensitive=self.params.get("case_insensitive", False),
                    context_lines=self.params.get("context_lines", 3),
                    purpose=self.purpose,
                    expected_result=self.expected_result,
                )
            elif self.tool == "read_file":
                file_path = self.params.get("file_path", "")
                if not file_path:
                    return None
                return ReadFileAction(
                    file_path=file_path,
                    start_line=self.params.get("start_line"),
                    end_line=self.params.get("end_line"),
                    purpose=self.purpose,
                    expected_result=self.expected_result,
                )
            elif self.tool == "list_dir":
                return ListDirAction(
                    path=self.params.get("path", "."),
                    max_depth=self.params.get("max_depth", 2),
                    purpose=self.purpose,
                    expected_result=self.expected_result,
                )
            elif self.tool == "glob_search":
                pattern = self.params.get("pattern", "")
                if not pattern:
                    return None
                return GlobSearchAction(
                    pattern=pattern,
                    path=self.params.get("path"),
                    purpose=self.purpose,
                    expected_result=self.expected_result,
                )
        except Exception:
            return None
        return None


# --- New Typed Search Plan ---
class TypedSearchPlan(BaseModel):
    """
    Search plan with TYPED actions - ensures all required params are present.
    
    Uses discriminated union to enforce correct params for each tool type.
    This is the PREFERRED schema for LLM structured output.
    """
    
    strategy: Literal[
        "broad_to_narrow",
        "specific_file",
        "structure_first",
        "keyword_hunt",
        "trace_flow",
    ] = Field(
        description=(
            "Search strategy to use. "
            "keyword_hunt: search for unique technical terms directly (MOST COMMON). "
            "broad_to_narrow: start with wide patterns, narrow based on results. "
            "specific_file: already know the file, read it directly. "
            "structure_first: explore directories first. "
            "trace_flow: follow execution path from known entry point."
        )
    )
    
    grep_actions: List[GrepAction] = Field(
        default_factory=list,
        description="Grep search actions - each MUST have a pattern"
    )
    
    read_file_actions: List[ReadFileAction] = Field(
        default_factory=list,
        description="File read actions - each MUST have a file_path"
    )
    
    list_dir_actions: List[ListDirAction] = Field(
        default_factory=list,
        description="Directory listing actions"
    )
    
    glob_search_actions: List[GlobSearchAction] = Field(
        default_factory=list,
        description="Glob search actions - each MUST have a pattern"
    )
    
    strategy_reasoning: str = Field(
        default="",
        max_length=1000,
        description="Why this strategy was chosen"
    )
    
    @field_validator('grep_actions')
    @classmethod
    def validate_grep(cls, v: List[GrepAction]) -> List[GrepAction]:
        """Limit grep actions."""
        return v[:5] if v else []
    
    @field_validator('read_file_actions')
    @classmethod
    def validate_read(cls, v: List[ReadFileAction]) -> List[ReadFileAction]:
        """Limit read actions."""
        return v[:3] if v else []
    
    @field_validator('list_dir_actions')
    @classmethod
    def validate_list(cls, v: List[ListDirAction]) -> List[ListDirAction]:
        """Limit list actions."""
        return v[:2] if v else []
    
    @field_validator('glob_search_actions')
    @classmethod
    def validate_glob(cls, v: List[GlobSearchAction]) -> List[GlobSearchAction]:
        """Limit glob actions."""
        return v[:3] if v else []
    
    def get_all_actions(self) -> List[SearchAction]:
        """Get all actions as legacy SearchAction list for execution."""
        actions = []
        for action in self.grep_actions:
            actions.append(SearchAction.from_typed(action))
        for action in self.read_file_actions:
            actions.append(SearchAction.from_typed(action))
        for action in self.list_dir_actions:
            actions.append(SearchAction.from_typed(action))
        for action in self.glob_search_actions:
            actions.append(SearchAction.from_typed(action))
        return actions
    
    def get_total_actions(self) -> int:
        """Get total number of actions."""
        return (
            len(self.grep_actions) +
            len(self.read_file_actions) +
            len(self.list_dir_actions) +
            len(self.glob_search_actions)
        )


# --- Legacy SearchPlan for backward compatibility ---
class SearchPlan(BaseModel):
    """
    Legacy search plan - use TypedSearchPlan for new code.
    Kept for backward compatibility.
    """
    
    strategy: Literal[
        "broad_to_narrow",
        "specific_file",
        "structure_first",
        "keyword_hunt",
        "trace_flow",
    ] = Field(
        description=(
            "Search strategy to use. "
            "broad_to_narrow: start with wide patterns, narrow based on results. "
            "specific_file: already know the file, search within it. "
            "structure_first: explore directories to understand project layout. "
            "keyword_hunt: search for unique technical terms directly. "
            "trace_flow: follow execution path from known entry point."
        )
    )
    
    actions: List[SearchAction] = Field(
        default_factory=list,
        description="First 1-5 search actions to execute"
    )
    
    strategy_reasoning: str = Field(
        default="",
        max_length=1000,
        description="Why this strategy was chosen for this question"
    )
    
    @field_validator('actions')
    @classmethod
    def validate_actions(cls, v: List[SearchAction]) -> List[SearchAction]:
        """Ensure 1-5 actions."""
        if not v:
            # Provide a default grep action
            return [SearchAction(
                tool="grep",
                params={"pattern": ".*"},
                purpose="Initial broad search",
                expected_result="Find relevant files",
            )]
        return v[:5]
    
    @classmethod
    def from_typed(cls, typed_plan: TypedSearchPlan) -> "SearchPlan":
        """Convert TypedSearchPlan to legacy SearchPlan."""
        return cls(
            strategy=typed_plan.strategy,
            actions=typed_plan.get_all_actions(),
            strategy_reasoning=typed_plan.strategy_reasoning,
        )


# ============================================
# STEP 3: Result Interpretation
# ============================================

class SearchResultInterpretation(BaseModel):
    """
    Interpretation of a single search result.
    
    Forces LLM to analyze what was found and decide next steps.
    """
    
    action_executed: str = Field(
        default="",
        max_length=500,
        description="Brief description of what action was executed"
    )
    
    found_relevant: bool = Field(
        default=False,
        description="Whether anything relevant to the question was found"
    )
    
    key_findings: List[str] = Field(
        default_factory=list,
        description=(
            "Key findings from this search (max 10). "
            "Include: file paths found, function/class names discovered, "
            "important code patterns observed."
        )
    )
    
    new_leads: List[str] = Field(
        default_factory=list,
        description=(
            "New search directions discovered from this result (max 5). "
            "E.g., 'found reference to BaseHandler, should search for its definition'"
        )
    )
    
    confidence: Literal["low", "medium", "high"] = Field(
        default="low",
        description=(
            "Confidence that we're on the right track. "
            "low: uncertain, might be wrong direction. "
            "medium: promising but need more confirmation. "
            "high: confident this is relevant code."
        )
    )
    
    next_action_needed: bool = Field(
        default=True,
        description="Whether we need more search actions to answer the question"
    )
    
    @field_validator('key_findings')
    @classmethod
    def validate_findings(cls, v: List[str]) -> List[str]:
        """Limit to 10 findings."""
        return v[:10] if v else []
    
    @field_validator('new_leads')
    @classmethod
    def validate_leads(cls, v: List[str]) -> List[str]:
        """Limit to 5 leads."""
        return v[:5] if v else []


# ============================================
# STEP 4: Final Answer
# ============================================

class CodeLocation(BaseModel):
    """A single code location found."""
    
    file_path: str = Field(
        description="Path to the file relative to repository root"
    )
    
    relevant_elements: List[str] = Field(
        default_factory=list,
        description="Classes, functions, methods found in this file that are relevant (max 10)"
    )
    
    line_range: Optional[str] = Field(
        default=None,
        description="Approximate line range if known (e.g., '100-150')"
    )
    
    relevance: str = Field(
        default="",
        max_length=500,
        description="Why this code is relevant to answering the question"
    )
    
    @field_validator('relevant_elements')
    @classmethod
    def validate_elements(cls, v: List[str]) -> List[str]:
        """Ensure 1-10 elements."""
        if not v:
            return ["(unknown)"]
        return v[:10]


class FinalAnswer(BaseModel):
    """
    Structured final answer.
    
    Contains summary, code locations, reasoning chain, and self-assessment.
    """
    
    summary: str = Field(
        default="",
        max_length=1000,
        description="Clear, concise answer to the user's question"
    )
    
    code_locations: List[CodeLocation] = Field(
        default_factory=list,
        description="Relevant code locations found (max 10)"
    )
    
    reasoning_chain: List[str] = Field(
        default_factory=list,
        description=(
            "Steps taken to arrive at this answer (max 10). "
            "E.g., ['Searched for QuerySet class', 'Found _result_cache in query.py', "
            "'Traced __iter__ method which calls _fetch_all']"
        )
    )
    
    confidence: Literal["low", "medium", "high"] = Field(
        default="low",
        description="Overall confidence in the answer"
    )
    
    potential_gaps: List[str] = Field(
        default_factory=list,
        description="What we might have missed or should verify further (max 5)"
    )
    
    @field_validator('code_locations')
    @classmethod
    def validate_locations(cls, v: List[CodeLocation]) -> List[CodeLocation]:
        """Ensure 1-10 locations."""
        if not v:
            return [CodeLocation(
                file_path="(not found)",
                relevant_elements=["(unknown)"],
                relevance="No relevant code found",
            )]
        return v[:10]
    
    @field_validator('reasoning_chain')
    @classmethod
    def validate_chain(cls, v: List[str]) -> List[str]:
        """Ensure 1-10 steps."""
        if not v:
            return ["Searched codebase"]
        return v[:10]
    
    @field_validator('potential_gaps')
    @classmethod
    def validate_gaps(cls, v: List[str]) -> List[str]:
        """Limit to 5 gaps."""
        return v[:5] if v else []


# ============================================
# Iteration Tracking
# ============================================

class SearchIteration(BaseModel):
    """One complete search iteration."""
    
    iteration_number: int = Field(ge=1, le=10)
    
    action: SearchAction = Field(
        description="Action that was executed"
    )
    
    tool_result_summary: Optional[str] = Field(
        default=None,
        max_length=2500,
        description="Summary of tool output (filled after execution)"
    )
    
    interpretation: Optional[SearchResultInterpretation] = Field(
        default=None,
        description="Interpretation of the result (filled after execution)"
    )
    
    next_step: Literal[
        "continue_search",    # Need more searches
        "need_more_context",  # Found something but need to read more
        "ready_to_answer",    # Have enough information to answer
    ] = Field(
        default="continue_search",
        description="What to do after this iteration"
    )


# ============================================
# Full Process Schema
# ============================================

class CodeSearchProcess(BaseModel):
    """
    Complete search process schema.
    
    This is the top-level schema that encompasses the entire
    Schema-Guided Reasoning process for code search.
    """
    
    # Input
    original_question: str = Field(
        description="The original user question"
    )
    
    repository_path: str = Field(
        description="Path to the repository being searched"
    )
    
    # Step 1: Question Analysis (mandatory)
    question_analysis: QuestionAnalysis = Field(
        description="Analysis of the question"
    )
    
    # Step 2: Initial Plan (mandatory)
    initial_plan: SearchPlan = Field(
        description="Initial search plan"
    )
    
    # Step 3: Search Iterations (0-10)
    search_iterations: List[SearchIteration] = Field(
        default_factory=list,
        description="Executed search iterations with results (max 10)"
    )
    
    # Step 4: Final Answer (mandatory when complete)
    final_answer: Optional[FinalAnswer] = Field(
        default=None,
        description="Final structured answer"
    )
    
    # Metadata
    total_tool_calls: int = Field(
        default=0,
        ge=0,
        description="Total number of tool calls made"
    )
    
    status: Literal["in_progress", "completed", "failed"] = Field(
        default="in_progress",
        description="Current status of the search process"
    )
    
    error: Optional[str] = Field(
        default=None,
        description="Error message if search failed"
    )
    
    @field_validator('search_iterations')
    @classmethod
    def validate_iterations(cls, v: List[SearchIteration]) -> List[SearchIteration]:
        """Limit to 10 iterations."""
        return v[:10] if v else []


# ============================================
# Tool Response Schemas (TypedDict-style for better type hints)
# ============================================

class GrepMatch(BaseModel):
    """Single grep match."""
    file: str = ""
    line: int = 0
    content: str = ""


class GrepResult(BaseModel):
    """Result from grep tool."""
    
    matches: List[GrepMatch] = Field(default_factory=list)
    files: List[str] = Field(default_factory=list)
    files_count: int = 0
    total_matches: int = 0
    truncated: bool = False
    error: Optional[str] = None


class ReadFileResult(BaseModel):
    """Result from read_file tool."""
    
    content: str = ""
    total_lines: int = 0
    read_lines: int = 0
    truncated: bool = False
    error: Optional[str] = None


class ListDirResult(BaseModel):
    """Result from list_dir tool."""
    
    structure: str = ""
    files_count: int = 0
    dirs_count: int = 0
    error: Optional[str] = None


class GlobSearchResult(BaseModel):
    """Result from glob_search tool."""
    
    files: List[str] = Field(default_factory=list)
    count: int = 0
    truncated: bool = False
    error: Optional[str] = None


# Union type for all tool results
ToolResult = Union[GrepResult, ReadFileResult, ListDirResult, GlobSearchResult]


# ============================================
# Next Action Decision
# ============================================

class NextActionDecision(BaseModel):
    """
    Decision about what to do next in the search process.
    
    All params are FLAT FIELDS (not Dict) because Gemini has issues with Dict[str, Any].
    """
    
    should_continue: bool = Field(
        default=False,
        description="Whether to continue searching"
    )
    
    reason: str = Field(
        default="",
        max_length=500,
        description="Reason for this decision"
    )
    
    ready_for_answer: bool = Field(
        default=False,
        description="Whether we have enough information to answer"
    )
    
    # ========================================
    # Next action fields - ALL FLAT for Gemini
    # ========================================
    has_next_action: bool = Field(
        default=False,
        description="Whether there is a next action to execute"
    )
    
    next_action_tool: Optional[Literal["grep", "read_file", "list_dir", "glob_search"]] = Field(
        default=None,
        description="Tool to use: grep, read_file, list_dir, or glob_search"
    )
    
    # FLAT params for grep tool
    grep_pattern: Optional[str] = Field(
        default=None,
        description="REQUIRED for grep: regex pattern to search, e.g. 'middleware', 'class.*Handler'"
    )
    grep_path: Optional[str] = Field(
        default=None,
        description="Optional for grep: directory to search in, e.g. 'src/core/'"
    )
    
    # FLAT params for read_file tool
    file_path: Optional[str] = Field(
        default=None,
        description="REQUIRED for read_file: path to file, e.g. 'src/handlers/base.py'"
    )
    
    # FLAT params for list_dir tool
    list_dir_path: Optional[str] = Field(
        default=None,
        description="REQUIRED for list_dir: directory path, e.g. 'src/core/'"
    )
    
    # FLAT params for glob_search tool
    glob_pattern: Optional[str] = Field(
        default=None,
        description="REQUIRED for glob_search: glob pattern, e.g. '*.py', '*middleware*'"
    )
    glob_path: Optional[str] = Field(
        default=None,
        description="Optional for glob_search: directory to search in"
    )
    
    next_action_purpose: Optional[str] = Field(
        default=None,
        max_length=500,
        description="Why this action is needed"
    )
    
    next_action_expected: Optional[str] = Field(
        default=None,
        max_length=500,
        description="What we expect to find"
    )
    
    # Keep for backward compatibility but will be ignored
    next_action_params: Optional[Dict[str, Any]] = Field(
        default=None,
        description="DEPRECATED - use flat fields above instead"
    )
    
    def get_next_action(self) -> Optional[SearchAction]:
        """Reconstruct SearchAction from flat fields, with validation."""
        action, _ = self._get_next_action_with_fallback_info()
        return action
    
    def _get_next_action_with_fallback_info(self) -> tuple[Optional[SearchAction], bool]:
        """
        Reconstruct SearchAction from FLAT fields (not Dict).
        
        Returns:
            Tuple of (action, used_fallback). used_fallback is True if params
            came from deprecated next_action_params dict.
        """
        if not self.has_next_action or not self.next_action_tool:
            return None, False
        
        params: Dict[str, Any] = {}
        used_fallback = False
        
        # Priority 1: Use NEW flat fields (preferred)
        if self.next_action_tool == "grep":
            if self.grep_pattern:
                params["pattern"] = self.grep_pattern
                if self.grep_path:
                    params["path"] = self.grep_path
                    
        elif self.next_action_tool == "read_file":
            if self.file_path:
                params["file_path"] = self.file_path
                
        elif self.next_action_tool == "list_dir":
            if self.list_dir_path:
                params["path"] = self.list_dir_path
                
        elif self.next_action_tool == "glob_search":
            if self.glob_pattern:
                params["pattern"] = self.glob_pattern
                if self.glob_path:
                    params["path"] = self.glob_path
        
        # Priority 2: Fallback to deprecated next_action_params dict
        if not params and self.next_action_params:
            params = self.next_action_params
            used_fallback = True
        
        # Validate required params for each tool
        if self.next_action_tool == "grep":
            if not params.get("pattern"):
                return None, False  # grep needs pattern
        elif self.next_action_tool == "read_file":
            if not params.get("file_path"):
                return None, False  # read_file needs file_path
        elif self.next_action_tool == "glob_search":
            if not params.get("pattern"):
                return None, False  # glob_search needs pattern
        elif self.next_action_tool == "list_dir":
            if not params.get("path"):
                return None, False  # list_dir needs path
        
        action = SearchAction(
            tool=self.next_action_tool,
            params=params,
            purpose=self.next_action_purpose or "Continue search",
            expected_result=self.next_action_expected or "Find relevant code",
        )
        return action, used_fallback