"""
Core semantic search functionality.
This module provides the main entry point for semantic search operations.
It uses a factory pattern to support multiple searcher implementations.
"""
from __future__ import annotations
import os
from enum import Enum
from typing import Dict, List, Optional, Type
from .searchers.base import BaseSearcher, SearchResult
class SearcherType(str, Enum):
"""Available searcher implementations."""
RIPGREP_CLAUDE = "ripgrep_claude"
AGENT = "agent" # Default agent (Claude)
AGENT_CLAUDE = "agent_claude"
AGENT_GEMINI = "agent_gemini" # Default Gemini (flash-lite)
AGENT_GEMINI_FLASH_LITE = "agent_gemini_flash_lite"
AGENT_GEMINI_FLASH = "agent_gemini_flash"
# SGR (Schema-Guided Reasoning) searchers - OpenAI
SGR = "sgr" # Default SGR (Gemini flash-lite)
SGR_GPT4O = "sgr_gpt4o"
SGR_GPT4O_MINI = "sgr_gpt4o_mini"
# SGR (Schema-Guided Reasoning) searchers - Gemini
SGR_GEMINI = "sgr_gemini" # Default Gemini (flash-lite)
SGR_GEMINI_FLASH_LITE = "sgr_gemini_flash_lite"
SGR_GEMINI_FLASH = "sgr_gemini_flash"
SGR_GEMINI_PRO = "sgr_gemini_pro"
# Registry of searcher implementations
_SEARCHER_REGISTRY: Dict[SearcherType, Type[BaseSearcher]] = {}
# Cached searcher instances
_SEARCHER_INSTANCES: Dict[SearcherType, BaseSearcher] = {}
def register_searcher(
searcher_type: SearcherType,
searcher_class: Type[BaseSearcher],
) -> None:
"""Register a searcher implementation."""
_SEARCHER_REGISTRY[searcher_type] = searcher_class
def get_searcher(
searcher_type: Optional[SearcherType] = None,
force_new: bool = False,
) -> BaseSearcher:
"""
Get a searcher instance.
Args:
searcher_type: Type of searcher to use. If None, uses SEARCHER_TYPE env var
or defaults to SGR_GEMINI_FLASH_LITE.
force_new: If True, creates a new instance instead of using cached one.
Returns:
BaseSearcher instance
"""
# Determine searcher type
if searcher_type is None:
type_str = os.getenv("SEARCHER_TYPE", SearcherType.SGR_GEMINI_FLASH_LITE.value)
try:
searcher_type = SearcherType(type_str)
except ValueError:
searcher_type = SearcherType.SGR_GEMINI_FLASH_LITE
# Check cache
if not force_new and searcher_type in _SEARCHER_INSTANCES:
return _SEARCHER_INSTANCES[searcher_type]
# Lazy load searcher classes to avoid import errors if deps missing
if searcher_type not in _SEARCHER_REGISTRY:
_load_searcher(searcher_type)
# Create instance
if searcher_type not in _SEARCHER_REGISTRY:
raise ValueError(f"Unknown searcher type: {searcher_type}")
searcher_class = _SEARCHER_REGISTRY[searcher_type]
instance = searcher_class()
# Cache instance
_SEARCHER_INSTANCES[searcher_type] = instance
return instance
def _load_searcher(searcher_type: SearcherType) -> None:
"""Lazy load a searcher implementation."""
if searcher_type == SearcherType.RIPGREP_CLAUDE:
from .searchers.ripgrep_claude import RipgrepClaudeSearcher
register_searcher(SearcherType.RIPGREP_CLAUDE, RipgrepClaudeSearcher)
elif searcher_type in (SearcherType.AGENT, SearcherType.AGENT_CLAUDE):
from .searchers.agent_searcher import ClaudeAgentSearcher
register_searcher(SearcherType.AGENT, ClaudeAgentSearcher)
register_searcher(SearcherType.AGENT_CLAUDE, ClaudeAgentSearcher)
elif searcher_type in (SearcherType.AGENT_GEMINI, SearcherType.AGENT_GEMINI_FLASH_LITE):
from .searchers.agent_searcher import GeminiFlashLiteSearcher
register_searcher(SearcherType.AGENT_GEMINI, GeminiFlashLiteSearcher)
register_searcher(SearcherType.AGENT_GEMINI_FLASH_LITE, GeminiFlashLiteSearcher)
elif searcher_type == SearcherType.AGENT_GEMINI_FLASH:
from .searchers.agent_searcher import GeminiFlashSearcher
register_searcher(SearcherType.AGENT_GEMINI_FLASH, GeminiFlashSearcher)
# SGR (Schema-Guided Reasoning) searchers - OpenAI
elif searcher_type == SearcherType.SGR_GPT4O:
from .searchers.sgr_searcher import SGRSearcherGPT4o
register_searcher(SearcherType.SGR_GPT4O, SGRSearcherGPT4o)
elif searcher_type == SearcherType.SGR_GPT4O_MINI:
from .searchers.sgr_searcher import SGRSearcherGPT4oMini
register_searcher(SearcherType.SGR_GPT4O_MINI, SGRSearcherGPT4oMini)
# SGR (Schema-Guided Reasoning) searchers - Gemini (default)
elif searcher_type in (SearcherType.SGR, SearcherType.SGR_GEMINI, SearcherType.SGR_GEMINI_FLASH_LITE):
from .searchers.sgr_searcher import SGRSearcherGeminiFlashLite
register_searcher(SearcherType.SGR, SGRSearcherGeminiFlashLite)
register_searcher(SearcherType.SGR_GEMINI, SGRSearcherGeminiFlashLite)
register_searcher(SearcherType.SGR_GEMINI_FLASH_LITE, SGRSearcherGeminiFlashLite)
elif searcher_type == SearcherType.SGR_GEMINI_FLASH:
from .searchers.sgr_searcher import SGRSearcherGeminiFlash
register_searcher(SearcherType.SGR_GEMINI_FLASH, SGRSearcherGeminiFlash)
elif searcher_type == SearcherType.SGR_GEMINI_PRO:
from .searchers.sgr_searcher import SGRSearcherGeminiPro
register_searcher(SearcherType.SGR_GEMINI_PRO, SGRSearcherGeminiPro)
def run_semantic_search(
query: str,
repo_path: Optional[str] = None,
path: Optional[str] = None,
searcher_type: Optional[SearcherType] = None,
) -> Dict[str, List[Dict[str, str]]]:
"""
Core semantic search function.
This is the main entry point for semantic search operations.
It handles repository path resolution and delegates to the
configured searcher implementation.
Args:
query: Natural language description of what to find
repo_path: Path to repository. If None, uses REPO_PATH env var or cwd.
path: Optional subdirectory to limit search scope
searcher_type: Optional searcher type override
Returns:
Dict with "items" list, each item has "file_path" and "content"
"""
# Resolve repository path
if repo_path is None:
repo_path = os.getenv("REPO_PATH", os.getcwd())
# Get searcher
searcher = get_searcher(searcher_type)
# Perform search
result = searcher.search(query=query, repo_path=repo_path, path=path)
# Return in expected format
return result.to_dict()
def list_available_searchers() -> List[str]:
"""List all available searcher types."""
return [s.value for s in SearcherType]
def get_current_searcher_info() -> Dict[str, str]:
"""Get information about the currently configured searcher."""
type_str = os.getenv("SEARCHER_TYPE", SearcherType.SGR_GEMINI_FLASH_LITE.value)
try:
searcher = get_searcher()
return {
"type": type_str,
"name": searcher.name,
}
except Exception as e:
return {
"type": type_str,
"error": str(e),
}