test_semantic_search.py•16.1 kB
"""
Integration tests for semantic code search (Scenario 3).
Tests verify (from quickstart.md):
- Semantic code search functionality (FR-011 to FR-015)
- Search relevance and similarity scoring
- Search filters (file_type, directory)
- Context lines (10 before/after)
- Performance target: p95 <500ms latency
- Graceful handling of no-match queries
TDD Compliance: These tests MUST FAIL initially since services are not implemented yet.
"""
from __future__ import annotations
import time
from decimal import Decimal
from pathlib import Path
from typing import TYPE_CHECKING
import pytest
from sqlalchemy.ext.asyncio import AsyncSession
if TYPE_CHECKING:
from src.models.code_chunk import CodeChunk
from src.services.searcher import SearchResult
@pytest.fixture
async def indexed_repository_with_diverse_code(
tmp_path: Path,
db_session: AsyncSession,
) -> Path:
"""
Create and index a repository with diverse code for search testing.
Includes:
- Math functions (add, multiply, calculator)
- String utilities (format, log)
- File I/O operations
- Different languages (Python, JavaScript if supported)
"""
pytest.skip("Indexer service not implemented yet (T031)")
# Future implementation:
# repo_path = tmp_path / "search-test-repo"
# repo_path.mkdir()
#
# src_dir = repo_path / "src"
# src_dir.mkdir()
#
# # Math module
# (src_dir / "calculator.py").write_text('''def add(a: int, b: int) -> int:
# """Add two numbers together and return the sum."""
# return a + b
#
# def multiply(a: int, b: int) -> int:
# """Multiply two numbers together and return the product."""
# return a * b
#
# class Calculator:
# """A calculator class for performing arithmetic operations."""
#
# def calculate(self, operation: str, a: int, b: int) -> int:
# """Perform the specified arithmetic operation."""
# if operation == "add":
# return add(a, b)
# elif operation == "multiply":
# return multiply(a, b)
# else:
# raise ValueError(f"Unknown operation: {operation}")
# ''')
#
# # String utilities
# (src_dir / "string_utils.py").write_text('''def format_result(value: int) -> str:
# """Format a numeric result as a string."""
# return f"Result: {value}"
#
# def log_message(message: str, level: str = "INFO") -> None:
# """Log a message to console with specified level."""
# print(f"[{level}] {message}")
# ''')
#
# # File I/O
# (src_dir / "file_ops.py").write_text('''from pathlib import Path
#
# def read_config(file_path: Path) -> dict:
# """Read configuration from a JSON file."""
# import json
# with open(file_path) as f:
# return json.load(f)
#
# def write_output(data: str, output_path: Path) -> None:
# """Write data to output file."""
# output_path.write_text(data)
# ''')
#
# # Index the repository
# from src.services.indexer import index_repository
# await index_repository(path=repo_path, name="Search Test Repo", force_reindex=False)
#
# return repo_path
@pytest.fixture
async def db_session() -> AsyncSession:
"""Create async database session for tests."""
pytest.skip("Database session fixture not implemented yet (requires T019-T027)")
@pytest.mark.integration
@pytest.mark.asyncio
async def test_basic_semantic_search_not_implemented(
indexed_repository_with_diverse_code: Path,
db_session: AsyncSession,
) -> None:
"""
Test basic semantic search functionality - NOT YET IMPLEMENTED.
Expected workflow:
1. Search for "How do I add two numbers?"
2. Verify add() function chunk returned with high similarity (>0.8)
3. Verify result includes context lines
4. Verify latency <500ms
This test MUST FAIL until T032 (searcher service) is implemented.
"""
pytest.skip("Semantic search service not implemented yet (T032)")
# Future implementation:
# from src.services.searcher import search_code
#
# start_time = time.perf_counter()
# results = await search_code(
# query="How do I add two numbers together?",
# limit=5,
# )
# latency_ms = (time.perf_counter() - start_time) * 1000
#
# # Verify results returned
# assert len(results) > 0
# assert results[0].similarity_score > 0.8 # High relevance for add function
#
# # Verify add function is top result
# top_result = results[0]
# assert "def add" in top_result.content
# assert "Add two numbers" in top_result.content
#
# # Verify context lines provided
# assert top_result.context_before is not None or top_result.context_after is not None
#
# # Verify performance
# assert latency_ms < 500, f"Search took {latency_ms:.0f}ms, exceeds 500ms target"
@pytest.mark.integration
@pytest.mark.asyncio
async def test_search_with_file_type_filter(
indexed_repository_with_diverse_code: Path,
db_session: AsyncSession,
) -> None:
"""
Test semantic search with file_type filter.
Expected behavior:
- Search for "calculator" with file_type="py"
- All results should be from .py files
- Results should include calculator.py chunks
This test MUST FAIL until T032 (searcher with filters) is implemented.
"""
pytest.skip("Search with filters not implemented yet (T032)")
# Future implementation:
# from src.services.searcher import search_code
#
# results = await search_code(
# query="calculator class",
# file_type="py",
# limit=10,
# )
#
# # Verify results exist
# assert len(results) > 0
#
# # Verify all results are from .py files
# for result in results:
# assert result.file_path.endswith(".py")
#
# # Verify Calculator class is in results
# contents = [r.content for r in results]
# assert any("class Calculator" in content for content in contents)
@pytest.mark.integration
@pytest.mark.asyncio
async def test_search_with_directory_filter(
indexed_repository_with_diverse_code: Path,
db_session: AsyncSession,
) -> None:
"""
Test semantic search with directory filter.
Expected behavior:
- Search with directory="src"
- All results should be from src/ directory
- Other directories excluded from results
This test MUST FAIL until T032 (searcher with filters) is implemented.
"""
pytest.skip("Search with filters not implemented yet (T032)")
# Future implementation:
# from src.services.searcher import search_code
#
# results = await search_code(
# query="function",
# directory="src",
# limit=10,
# )
#
# # Verify results exist
# assert len(results) > 0
#
# # Verify all results are from src/ directory
# for result in results:
# assert "src/" in result.file_path or result.file_path.startswith("src/")
@pytest.mark.integration
@pytest.mark.asyncio
async def test_search_with_combined_filters(
indexed_repository_with_diverse_code: Path,
db_session: AsyncSession,
) -> None:
"""
Test semantic search with multiple filters combined.
Expected behavior:
- Search with file_type="py" AND directory="src"
- Results must satisfy both filters
- Verify filter combination works correctly
This test MUST FAIL until T032 (searcher with filters) is implemented.
"""
pytest.skip("Search with combined filters not implemented yet (T032)")
# Future implementation:
# from src.services.searcher import search_code
#
# results = await search_code(
# query="add numbers",
# file_type="py",
# directory="src",
# limit=5,
# )
#
# # Verify results exist
# assert len(results) > 0
#
# # Verify all results satisfy both filters
# for result in results:
# assert result.file_path.endswith(".py")
# assert "src/" in result.file_path
@pytest.mark.integration
@pytest.mark.asyncio
async def test_search_similarity_scoring(
indexed_repository_with_diverse_code: Path,
db_session: AsyncSession,
) -> None:
"""
Test that search results are ranked by similarity score.
Expected behavior:
- Results ordered by similarity score (highest first)
- Similarity scores between 0.0 and 1.0
- More relevant results have higher scores
This test MUST FAIL until T032 (searcher) is implemented.
"""
pytest.skip("Similarity scoring not implemented yet (T032)")
# Future implementation:
# from src.services.searcher import search_code
#
# results = await search_code(
# query="Add two numbers together",
# limit=10,
# )
#
# # Verify results exist
# assert len(results) > 0
#
# # Verify similarity scores are valid
# for result in results:
# assert 0.0 <= result.similarity_score <= 1.0
#
# # Verify results are ordered by similarity (descending)
# scores = [r.similarity_score for r in results]
# assert scores == sorted(scores, reverse=True)
#
# # Verify most relevant result has high score
# assert results[0].similarity_score > 0.7
@pytest.mark.integration
@pytest.mark.asyncio
async def test_search_context_lines(
indexed_repository_with_diverse_code: Path,
db_session: AsyncSession,
) -> None:
"""
Test that search results include context lines (10 before/after).
Expected behavior:
- Each result includes context_before (up to 10 lines before chunk)
- Each result includes context_after (up to 10 lines after chunk)
- Context helps understand code in context
This test MUST FAIL until T032 (searcher) is implemented.
"""
pytest.skip("Context lines not implemented yet (T032)")
# Future implementation:
# from src.services.searcher import search_code
#
# results = await search_code(
# query="add function",
# limit=5,
# )
#
# # Verify results exist
# assert len(results) > 0
#
# # Verify context lines provided
# top_result = results[0]
#
# # Context may be empty at file boundaries, but should be provided
# assert hasattr(top_result, "context_before")
# assert hasattr(top_result, "context_after")
#
# # If there are lines before/after, verify they're included
# if top_result.start_line > 1:
# assert top_result.context_before is not None
# assert len(top_result.context_before.split("\n")) <= 10
@pytest.mark.integration
@pytest.mark.asyncio
async def test_search_with_no_results(
indexed_repository_with_diverse_code: Path,
db_session: AsyncSession,
) -> None:
"""
Test graceful handling when search returns no results.
Expected behavior:
- Search for query with no semantic matches
- Returns empty array (not error)
- Response structure is still valid
This test MUST FAIL until T032 (searcher) is implemented.
"""
pytest.skip("Search service not implemented yet (T032)")
# Future implementation:
# from src.services.searcher import search_code
#
# results = await search_code(
# query="blockchain consensus algorithm quantum computing",
# limit=10,
# )
#
# # Verify empty results (not error)
# assert results == []
# assert isinstance(results, list)
@pytest.mark.integration
@pytest.mark.asyncio
async def test_search_limit_parameter(
indexed_repository_with_diverse_code: Path,
db_session: AsyncSession,
) -> None:
"""
Test that search respects the limit parameter.
Expected behavior:
- Search with limit=3
- Returns at most 3 results
- Results are top-ranked by similarity
This test MUST FAIL until T032 (searcher) is implemented.
"""
pytest.skip("Search service not implemented yet (T032)")
# Future implementation:
# from src.services.searcher import search_code
#
# # Search with limit=3
# results_limited = await search_code(
# query="function",
# limit=3,
# )
#
# # Verify limit respected
# assert len(results_limited) <= 3
#
# # Search with higher limit to verify top results
# results_all = await search_code(
# query="function",
# limit=100,
# )
#
# # Verify limited results are top-ranked
# if len(results_all) >= 3:
# for i in range(3):
# assert results_limited[i].chunk_id == results_all[i].chunk_id
@pytest.mark.integration
@pytest.mark.asyncio
async def test_search_performance_latency(
indexed_repository_with_diverse_code: Path,
db_session: AsyncSession,
) -> None:
"""
Test that search meets <500ms p95 latency target.
Expected behavior:
- Run multiple searches
- Measure latency for each
- Verify p95 <500ms
This test MUST FAIL until T032 (searcher optimization) is implemented.
"""
pytest.skip("Search performance optimization not implemented yet (T032)")
# Future implementation:
# from src.services.searcher import search_code
#
# queries = [
# "add two numbers",
# "calculator class",
# "format result",
# "log message",
# "read configuration",
# ]
#
# latencies: list[float] = []
#
# for query in queries:
# start_time = time.perf_counter()
# await search_code(query=query, limit=10)
# latency_ms = (time.perf_counter() - start_time) * 1000
# latencies.append(latency_ms)
#
# # Calculate p95 latency
# latencies.sort()
# p95_index = int(len(latencies) * 0.95)
# p95_latency = latencies[p95_index]
#
# # Verify performance target
# assert p95_latency < 500, f"P95 latency {p95_latency:.0f}ms exceeds 500ms target"
@pytest.mark.integration
@pytest.mark.asyncio
async def test_search_embedding_generation_for_query(
indexed_repository_with_diverse_code: Path,
db_session: AsyncSession,
) -> None:
"""
Test that query embeddings are generated correctly.
Expected behavior:
- Query text is converted to embedding
- Embedding is 768-dimensional (nomic-embed-text)
- Embedding is used for cosine similarity search
This test MUST FAIL until T030 (embedder) and T032 (searcher) are implemented.
"""
pytest.skip("Query embedding not implemented yet (T030, T032)")
# Future implementation:
# from src.services.embedder import generate_embedding
#
# query = "How do I add two numbers?"
# embedding = await generate_embedding(query)
#
# # Verify embedding generated
# assert embedding is not None
# assert len(embedding) == 768 # nomic-embed-text dimensions
# assert all(isinstance(x, float) for x in embedding)
@pytest.mark.integration
@pytest.mark.asyncio
async def test_search_returns_file_metadata(
indexed_repository_with_diverse_code: Path,
db_session: AsyncSession,
) -> None:
"""
Test that search results include file metadata.
Expected behavior:
- Results include file_path, language
- Results include chunk location (start_line, end_line)
- Metadata helps users locate code
This test MUST FAIL until T032 (searcher) is implemented.
"""
pytest.skip("Search service not implemented yet (T032)")
# Future implementation:
# from src.services.searcher import search_code
#
# results = await search_code(
# query="add function",
# limit=5,
# )
#
# # Verify results exist
# assert len(results) > 0
#
# # Verify metadata included
# for result in results:
# assert result.file_path is not None
# assert result.file_path.endswith(".py")
# assert result.start_line >= 1
# assert result.end_line >= result.start_line
# assert result.chunk_type in ["function", "class", "block"]