"""Search engine for process documents.
Implements in-memory search with relevance ranking per ADR-007.
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Any
from sso_mcp_server import get_logger
if TYPE_CHECKING:
from sso_mcp_server.processes.service import ProcessService
_logger = get_logger("process_search")
# Relevance scoring weights (per design.md)
NAME_MATCH_POINTS = 100
DESCRIPTION_MATCH_POINTS = 50
CONTENT_MATCH_POINTS = 10
# Maximum results to return (FR-012a)
MAX_RESULTS = 50
# Snippet context length
SNIPPET_CONTEXT = 50
class SearchEngine:
"""In-memory search engine for process documents.
Searches across name, description, and content fields
with relevance ranking.
"""
def __init__(self, service: ProcessService) -> None:
"""Initialize the search engine.
Args:
service: ProcessService instance for accessing processes.
"""
self._service = service
_logger.debug("search_engine_initialized")
def search(self, query: str) -> list[dict[str, Any]]:
"""Search processes by keyword.
Searches across process name, description, and content (FR-010).
Results are ordered by relevance (FR-011).
Returns up to 50 results (FR-012a).
Args:
query: Search keyword or phrase (case-insensitive).
Returns:
List of search results with name, description, relevance_score, and snippet.
Sorted by relevance_score descending.
"""
_logger.info("search_request", query=query)
query_lower = query.lower()
results = []
# Get all processes from service
for process in self._get_all_processes():
score = self._calculate_relevance(process, query_lower)
if score > 0:
snippet = self._extract_snippet(process, query_lower)
results.append(
{
"name": process["name"],
"description": process.get("description", ""),
"relevance_score": score,
"snippet": snippet,
}
)
# Sort by relevance (descending)
results.sort(key=lambda x: x["relevance_score"], reverse=True)
# Limit to MAX_RESULTS
results = results[:MAX_RESULTS]
_logger.info("search_completed", query=query, result_count=len(results))
return results
def _get_all_processes(self) -> list[dict[str, Any]]:
"""Get all processes with full content.
Returns:
List of process dictionaries with name, description, content.
"""
from sso_mcp_server.processes.discovery import discover_processes
from sso_mcp_server.processes.parser import parse_process_file
processes = []
for file_path in discover_processes(self._service._process_dir):
parsed = parse_process_file(file_path)
if parsed is not None:
processes.append(parsed)
return processes
def _calculate_relevance(self, process: dict[str, Any], query: str) -> float:
"""Calculate relevance score for a process.
Scoring (per design.md):
- Name match: 100 points
- Description match: 50 points
- Content match: 10 points per match
Args:
process: Process dictionary with name, description, content.
query: Lowercase search query.
Returns:
Relevance score (0.0 to 1.0 normalized).
"""
score = 0
max_possible = NAME_MATCH_POINTS + DESCRIPTION_MATCH_POINTS + CONTENT_MATCH_POINTS * 5
name = process.get("name", "").lower()
description = process.get("description", "").lower()
content = process.get("content", "").lower()
# Name match (highest weight)
if query in name:
score += NAME_MATCH_POINTS
# Description match
if query in description:
score += DESCRIPTION_MATCH_POINTS
# Content matches (count occurrences, cap at 5)
content_matches = content.count(query)
score += min(content_matches, 5) * CONTENT_MATCH_POINTS
# Normalize to 0.0 - 1.0
return round(score / max_possible, 2) if score > 0 else 0.0
def _extract_snippet(self, process: dict[str, Any], query: str) -> str:
"""Extract a snippet showing the match context.
Args:
process: Process dictionary with content.
query: Lowercase search query.
Returns:
Snippet string with match context.
"""
content = process.get("content", "")
content_lower = content.lower()
# Find the first match position
pos = content_lower.find(query)
if pos == -1:
# Check description
description = process.get("description", "")
if query in description.lower():
return description[:100] + ("..." if len(description) > 100 else "")
# Check name
return f"Match in: {process.get('name', 'Unknown')}"
# Extract snippet with context
start = max(0, pos - SNIPPET_CONTEXT)
end = min(len(content), pos + len(query) + SNIPPET_CONTEXT)
snippet = content[start:end]
# Add ellipsis if truncated
if start > 0:
snippet = "..." + snippet
if end < len(content):
snippet = snippet + "..."
return snippet