"""
Semantic service for vector/text search operations
"""
import logging
from typing import Dict, List, Optional, Any
from repositories.vector_repository import VectorRepository
from repositories.embedding_repository import EmbeddingRepository
from shared.models import SemanticSearchResult
from shared.exceptions import EmbeddingError
logger = logging.getLogger(__name__)
class SemanticService:
"""
Service for semantic search operations
Coordinates between vector and embedding repositories
"""
def __init__(self, vector_repo: VectorRepository, embedding_config=None):
self.vector_repo = vector_repo
self.embedding_repo = EmbeddingRepository(vector_repo.engine)
self.embedding_config = embedding_config
def search(
self,
query: str,
limit: int = 10,
filters: Optional[Dict[str, Any]] = None
) -> List[SemanticSearchResult]:
"""
Main semantic search method
Args:
query: Search query
limit: Maximum number of results
filters: Optional filters (table_filter, fk_filter)
Returns:
List of semantic search results
"""
try:
# Extract filters
table_filter = filters.get('table_filter') if filters else None
fk_filter = filters.get('fk_filter') if filters else None
# Try to generate embedding for the query
query_embedding = self.embedding_repo.generate_embedding(query)
# Use the vector repository's semantic search with fallback
return self.vector_repo.semantic_search_with_fallback(
question=query,
query_embedding=query_embedding,
table_filter=table_filter,
fk_filter=fk_filter,
limit=limit
)
except Exception as e:
logger.error(f"Semantic search failed: {e}")
# Fallback to pure text search
return self.vector_repo.text_search_fallback(
question=query,
table_filter=table_filter,
fk_filter=fk_filter,
limit=limit
)
def is_vector_search_available(self) -> bool:
"""Check if vector search is available"""
return self.vector_repo.has_vector_extension()
def get_search_stats(self) -> Dict[str, Any]:
"""Get statistics about the search system"""
try:
vector_stats = self.vector_repo.get_embedding_stats()
embedding_stats = self.embedding_repo.get_embedding_stats()
return {
'vector_search_available': self.is_vector_search_available(),
'vector_stats': vector_stats,
'embedding_stats': embedding_stats
}
except Exception as e:
logger.error(f"Could not get search stats: {e}")
return {
'vector_search_available': False,
'error': str(e)
}
def extract_search_terms(self, query: str) -> List[str]:
"""Extract meaningful search terms from a query"""
return self.vector_repo._extract_search_terms(query)