search_code
Find code by meaning using natural language queries. Search for code semantically similar to your description without needing exact variable or function names.
Instructions
Search for code semantically similar to the query.
Finds code by meaning, not just text matching. Use this when you want to find code related to a concept without knowing exact variable/function names.
Examples:
"authentication logic" - finds login, session handling, token validation
"error handling for API calls" - finds try/except blocks, error responses
"database connection setup" - finds connection pooling, ORM initialization
Automatically indexes the project if not already indexed, and re-indexes any files that have changed since the last search.
Args: query: Natural language description of what you're looking for. project_path: Absolute path to the project root directory. limit: Maximum number of results to return (default 10).
Returns: List of matching code chunks with file path, line numbers, content, and score.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| query | Yes | ||
| project_path | Yes | ||
| limit | No |
Implementation Reference
- src/semantic_code_mcp/server.py:30-110 (handler)Main search_code tool handler registered with @mcp.tool() decorator. Accepts query, project_path, and limit parameters, delegates to SearchService, and returns SearchResponse with formatted results and debug info including timings and stats.@mcp.tool() @profile_async("search_code") async def search_code( query: str, project_path: str, ctx: Context[ServerSession, None], limit: int = 10, ) -> SearchResponse | ErrorResponse: """Search for code semantically similar to the query. Finds code by meaning, not just text matching. Use this when you want to find code related to a concept without knowing exact variable/function names. Examples: - "authentication logic" - finds login, session handling, token validation - "error handling for API calls" - finds try/except blocks, error responses - "database connection setup" - finds connection pooling, ORM initialization Automatically indexes the project if not already indexed, and re-indexes any files that have changed since the last search. Args: query: Natural language description of what you're looking for. project_path: Absolute path to the project root directory. limit: Maximum number of results to return (default 10). Returns: List of matching code chunks with file path, line numbers, content, and score. """ total_start = time.perf_counter() await ctx.info(f"Searching for: {query}") path = Path(project_path) if not path.exists(): await ctx.warning(f"Project path does not exist: {project_path}") return ErrorResponse(error=f"Path does not exist: {project_path}") # Delegate to search service container = get_container() search_service = container.create_search_service(path) outcome = await search_service.search(query, path, limit, on_progress=ctx.report_progress) total_ms = round((time.perf_counter() - total_start) * 1000, 1) await ctx.info(f"Found {len(outcome.results)} results in {total_ms}ms") # Transform domain -> response indexing_ms = round(outcome.index_result.duration_seconds * 1000, 1) timings = SearchTimings( embedding_ms=outcome.embedding_ms, search_ms=outcome.search_ms, total_ms=total_ms, indexing_ms=indexing_ms if indexing_ms > 0 else None, ) index_result_summary = IndexResultSummary( files_indexed=outcome.index_result.files_indexed, chunks_indexed=outcome.index_result.chunks_indexed, ) was_stale = outcome.index_result.files_indexed > 0 # Get live index status for debug info index_service = container.create_index_service(path) status = index_service.get_status(path) debug = SearchDebugInfo( timings=timings, stats=SearchStats.from_outcome(outcome), index_status=IndexStatusSummary( files_count=status.files_count, chunks_count=status.chunks_count, was_stale=was_stale, ), index_result=index_result_summary, ) return SearchResponse( results=[FormattedSearchResult.from_domain(r) for r in outcome.results], debug=debug, )
- SearchResponse schema defining the tool's output structure with results list and debug information.class SearchResponse(BaseModel): """Complete search response with results and debug info.""" results: list[FormattedSearchResult] debug: SearchDebugInfo
- FormattedSearchResult schema with from_domain() classmethod that transforms SearchResult domain model to API response format, including content truncation logic.class FormattedSearchResult(BaseModel): """A search result formatted for response.""" file_path: str line_start: int line_end: int name: str chunk_type: str content: str score: float truncated: bool = False @classmethod def from_domain( cls, result: SearchResult, max_lines: int = 50, ) -> FormattedSearchResult: content = result.content lines = content.split("\n") truncated = len(lines) > max_lines if truncated: content = "\n".join(lines[:max_lines]) + "\n... (truncated)" return cls( file_path=result.file_path, line_start=result.line_start, line_end=result.line_end, name=result.name, chunk_type=result.chunk_type, content=content, score=round(result.score, 3), truncated=truncated, )
- SearchService.search() method implementing the core search logic: checks index status, triggers auto-indexing for stale files, performs hybrid vector/full-text search, and returns SearchOutcome.async def search( self, query: str, project_path: Path, limit: int = 10, min_score: float = 0.3, vector_weight: float = 0.5, on_progress: ProgressCallback | None = None, ) -> SearchOutcome: """Search with optional auto-indexing and progress callbacks. Args: query: Natural language search query. project_path: Root directory of the project. limit: Maximum number of results. min_score: Minimum similarity score threshold. vector_weight: Weight for vector vs full-text search. on_progress: Optional callback matching ctx.report_progress(progress, total, message). Returns: SearchOutcome with results and timing info. """ async def _progress(percent: float, message: str) -> None: if on_progress is not None: await on_progress(percent, 100, message) await _progress(5, "Checking index...") # Check if indexing needed status = self.index_service.get_status(project_path) index_result: IndexResult | None = None needs_index = not status.is_indexed needs_reindex = status.is_indexed and bool(status.stale_files) if needs_index or needs_reindex: reason = ( "Index not found, indexing..." if needs_index else f"Re-indexing {len(status.stale_files)} stale files..." ) await _progress(10, reason) index_result = await self.index_service.index(project_path, force=False) await _progress(85, "Searching...") outcome = await asyncio.to_thread(self._do_search, query, limit, min_score, vector_weight) if index_result is not None: outcome.index_result = index_result await _progress(100, f"Found {len(outcome.results)} results") return outcome
- SearchService._do_search() internal implementation that embeds the query, executes hybrid search, applies min_score filtering, recency boosting, and groups results by file.def _do_search( self, query: str, limit: int, min_score: float, vector_weight: float, ) -> SearchOutcome: """Internal search implementation.""" total_start = time.perf_counter() # Embed query t0 = time.perf_counter() query_embedding = self.embedder.embed_text(query) embedding_ms = (time.perf_counter() - t0) * 1000 # Hybrid search t0 = time.perf_counter() raw_results = self.store.search_hybrid( query_embedding, query, limit * SEARCH_OVERFETCH_FACTOR, vector_weight, ) search_ms = (time.perf_counter() - t0) * 1000 # Filter low-confidence results filtered = [r for r in raw_results if r.score >= min_score] # Apply recency boost boosted = self._apply_recency_boost(filtered) # Sort by boosted score, take limit boosted.sort(key=lambda x: x[1], reverse=True) top_results = [r for r, _ in boosted[:limit]] # Group by file grouped = self._group_by_file(top_results) total_ms = (time.perf_counter() - total_start) * 1000 return SearchOutcome( results=grouped, raw_count=len(raw_results), filtered_count=len(raw_results) - len(filtered), embedding_ms=round(embedding_ms, 1), search_ms=round(search_ms, 1), total_ms=round(total_ms, 1), )