Skip to main content
Glama
test_intelligent_retrieval.py24.5 kB
""" Unit tests for intelligent retrieval service """ import pytest import pytest_asyncio from unittest.mock import AsyncMock, MagicMock, patch from datetime import datetime, timedelta from typing import List, Dict, Any from src.intelligent_retrieval import ( SearchContext, RetrievalResult, QueryIntentClassifier, ContextualRetriever, IntelligentRetrievalService ) from src.models import Project, Todo, CalendarEvent class TestSearchContext: """Test SearchContext dataclass""" def test_search_context_creation(self): """Test basic search context creation""" context = SearchContext( user_id="test_user", tenant_id="test_tenant", query="test query" ) assert context.user_id == "test_user" assert context.tenant_id == "test_tenant" assert context.query == "test query" assert context.intent is None assert context.max_results == 10 assert context.similarity_threshold == 0.7 def test_search_context_with_filters(self): """Test search context with filters""" context = SearchContext( user_id="user", tenant_id="tenant", query="urgent tasks", intent="todo_planning", time_scope="today", priority_filter="high", content_types=["todos", "projects"], max_results=5, similarity_threshold=0.8 ) assert context.intent == "todo_planning" assert context.time_scope == "today" assert context.priority_filter == "high" assert context.content_types == ["todos", "projects"] assert context.max_results == 5 assert context.similarity_threshold == 0.8 class TestRetrievalResult: """Test RetrievalResult dataclass""" def test_retrieval_result_creation(self): """Test retrieval result creation""" result = RetrievalResult( content_type="project", item_id="project_123", title="Test Project", description="A test project", relevance_score=0.85, context_match={"intent_match": True}, metadata={"priority": "high"} ) assert result.content_type == "project" assert result.item_id == "project_123" assert result.title == "Test Project" assert result.relevance_score == 0.85 assert result.context_match["intent_match"] is True assert result.metadata["priority"] == "high" def test_retrieval_result_to_dict(self): """Test retrieval result to dict conversion""" result = RetrievalResult( content_type="todo", item_id="todo_456", title="Test Todo", description=None, relevance_score=0.75, context_match={"time_match": False}, metadata={"completed": False} ) result_dict = result.to_dict() assert result_dict["content_type"] == "todo" assert result_dict["item_id"] == "todo_456" assert result_dict["title"] == "Test Todo" assert result_dict["description"] is None assert result_dict["relevance_score"] == 0.75 assert result_dict["context_match"]["time_match"] is False assert result_dict["metadata"]["completed"] is False class TestQueryIntentClassifier: """Test QueryIntentClassifier class""" def test_classify_intent_project_search(self): """Test intent classification for project search""" queries = [ "show me my projects", "what am I working on", "current project status", "building new app" ] for query in queries: intent = QueryIntentClassifier.classify_intent(query) assert intent in ["project_search", "status_update", "general_search"] def test_classify_intent_todo_planning(self): """Test intent classification for todo planning""" queries = [ "what tasks do I need to do", "todo list for today", "upcoming deadlines", "plan my work" ] for query in queries: intent = QueryIntentClassifier.classify_intent(query) assert intent in ["todo_planning", "status_update", "general_search"] def test_classify_intent_status_update(self): """Test intent classification for status updates""" queries = [ "what is my current status", "show me my progress", "where am I on projects", "current work summary" ] for query in queries: intent = QueryIntentClassifier.classify_intent(query) # Status update queries might be classified as status_update or project_search assert intent in ["status_update", "project_search", "general_search"] def test_classify_intent_calendar_query(self): """Test intent classification for calendar queries""" queries = [ "upcoming meetings", "schedule for today", "calendar events", "next appointment" ] for query in queries: intent = QueryIntentClassifier.classify_intent(query) assert intent in ["calendar_query", "general_search"] def test_classify_intent_document_search(self): """Test intent classification for document search""" queries = [ "find my documents", "search files", "notes about project", "saved documentation" ] for query in queries: intent = QueryIntentClassifier.classify_intent(query) assert intent in ["document_search", "general_search"] def test_classify_intent_unknown(self): """Test intent classification for unknown queries""" queries = [ "random unrelated query", "xyz abc def", "" ] for query in queries: intent = QueryIntentClassifier.classify_intent(query) assert intent == "general_search" def test_extract_time_scope(self): """Test time scope extraction""" test_cases = [ ("tasks for today", "today"), ("this week's projects", "this_week"), ("monthly review", "this_month"), ("recent updates", "recent"), ("overdue tasks", "overdue"), ("random query", None) ] for query, expected_scope in test_cases: scope = QueryIntentClassifier.extract_time_scope(query) assert scope == expected_scope def test_extract_priority_filter(self): """Test priority filter extraction""" test_cases = [ ("urgent tasks", "high"), ("high priority items", "high"), ("important projects", "high"), ("medium priority", "medium"), ("normal tasks", "medium"), ("low priority items", "low"), ("later tasks", "low"), ("regular query", None) ] for query, expected_priority in test_cases: priority = QueryIntentClassifier.extract_priority_filter(query) assert priority == expected_priority class TestContextualRetriever: """Test ContextualRetriever class""" @pytest_asyncio.fixture async def mock_database(self): """Mock database interface""" db = AsyncMock() # Mock projects projects = [ Project( id="p1", name="Website Development", description="Building company website", status="active", priority="high", tags=["web", "development"], created_date=datetime.now() - timedelta(days=5), updated_date=datetime.now() - timedelta(hours=2) ), Project( id="p2", name="Mobile App", description="iOS mobile application", status="active", priority="medium", tags=["mobile", "ios"], created_date=datetime.now() - timedelta(days=10), updated_date=datetime.now() - timedelta(days=1) ) ] # Mock todos todos = [ Todo( id="t1", title="Design homepage", description="Create mockups for homepage", completed=False, priority="high", project_id="p1", due_date=datetime.now() + timedelta(days=2), created_date=datetime.now() - timedelta(days=3), updated_date=datetime.now() - timedelta(hours=1) ), Todo( id="t2", title="Setup development env", description="Configure development environment", completed=True, priority="medium", project_id="p1", created_date=datetime.now() - timedelta(days=4), updated_date=datetime.now() - timedelta(days=3) ) ] # Mock events events = [ CalendarEvent( id="e1", title="Client Meeting", description="Review progress with client", start_time=datetime.now() + timedelta(hours=2), end_time=datetime.now() + timedelta(hours=3), location="Conference Room", attendees=["client@example.com"], created_date=datetime.now() - timedelta(days=1), updated_date=datetime.now() - timedelta(hours=1) ) ] db.get_projects.return_value = projects db.get_todos.return_value = todos db.get_calendar_events.return_value = events return db @pytest_asyncio.fixture async def mock_embedding_service(self): """Mock embedding service""" service = AsyncMock() service.generate_embedding.return_value = [0.1] * 384 service.cosine_similarity.return_value = 0.8 return service @pytest_asyncio.fixture async def contextual_retriever(self, mock_database, mock_embedding_service): """Create contextual retriever with mocks""" return ContextualRetriever(mock_database, mock_embedding_service) @pytest.mark.asyncio async def test_retrieve_with_intent_classification(self, contextual_retriever): """Test retrieval with automatic intent classification""" context = SearchContext( user_id="user1", tenant_id="tenant1", query="what projects am I working on" ) results = await contextual_retriever.retrieve(context) assert isinstance(results, list) # Should have classified intent automatically assert context.intent is not None @pytest.mark.asyncio async def test_retrieve_projects(self, contextual_retriever): """Test project retrieval""" context = SearchContext( user_id="user1", tenant_id="tenant1", query="web development", content_types=["projects"], max_results=5 ) results = await contextual_retriever.retrieve(context) assert isinstance(results, list) assert len(results) <= context.max_results # Check that results are RetrievalResult objects for result in results: assert isinstance(result, RetrievalResult) assert result.content_type == "project" @pytest.mark.asyncio async def test_retrieve_todos(self, contextual_retriever): """Test todo retrieval""" context = SearchContext( user_id="user1", tenant_id="tenant1", query="design tasks", content_types=["todos"], max_results=5 ) results = await contextual_retriever.retrieve(context) assert isinstance(results, list) for result in results: assert isinstance(result, RetrievalResult) assert result.content_type == "todo" @pytest.mark.asyncio async def test_retrieve_events(self, contextual_retriever): """Test event retrieval""" context = SearchContext( user_id="user1", tenant_id="tenant1", query="meetings", content_types=["events"], max_results=5 ) results = await contextual_retriever.retrieve(context) assert isinstance(results, list) for result in results: assert isinstance(result, RetrievalResult) assert result.content_type == "event" @pytest.mark.asyncio async def test_retrieve_with_priority_filter(self, contextual_retriever): """Test retrieval with priority filter""" context = SearchContext( user_id="user1", tenant_id="tenant1", query="high priority work", priority_filter="high", max_results=10 ) results = await contextual_retriever.retrieve(context) # Should have applied priority filter assert isinstance(results, list) @pytest.mark.asyncio async def test_retrieve_with_time_scope(self, contextual_retriever): """Test retrieval with time scope""" context = SearchContext( user_id="user1", tenant_id="tenant1", query="today's work", time_scope="today", max_results=10 ) results = await contextual_retriever.retrieve(context) # Should have applied time scope filter assert isinstance(results, list) def test_determine_content_types(self, contextual_retriever): """Test content type determination based on intent""" test_cases = [ ("project_search", ["projects", "todos"]), ("todo_planning", ["todos", "projects"]), ("calendar_query", ["events"]), ("document_search", ["documents"]), ("status_update", ["projects", "todos", "events"]), ("unknown_intent", ["projects", "todos"]) ] for intent, expected_types in test_cases: types = contextual_retriever._determine_content_types(intent) assert types == expected_types def test_matches_context_filters(self, contextual_retriever): """Test context filter matching""" project = Project( id="p1", name="Test Project", description="Test", status="active", priority="high", created_date=datetime.now() - timedelta(days=1), updated_date=datetime.now() - timedelta(hours=1) ) # Test priority filter match context = SearchContext( user_id="user1", tenant_id="tenant1", query="test", priority_filter="high" ) assert contextual_retriever._matches_context_filters(project, context) is True # Test priority filter mismatch context.priority_filter = "low" assert contextual_retriever._matches_context_filters(project, context) is False # Test no filter context.priority_filter = None assert contextual_retriever._matches_context_filters(project, context) is True def test_check_time_relevance(self, contextual_retriever): """Test time relevance checking""" now = datetime.now() # Test today today_item = now.replace(hour=10, minute=0) assert contextual_retriever._check_time_relevance(today_item, "today") is True yesterday_item = now - timedelta(days=1) assert contextual_retriever._check_time_relevance(yesterday_item, "today") is False # Test this week this_week_item = now - timedelta(days=3) assert contextual_retriever._check_time_relevance(this_week_item, "this_week") is True last_week_item = now - timedelta(days=10) assert contextual_retriever._check_time_relevance(last_week_item, "this_week") is False # Test no scope assert contextual_retriever._check_time_relevance(last_week_item, None) is True def test_get_time_range(self, contextual_retriever): """Test time range calculation""" now = datetime.now() # Test today start, end = contextual_retriever._get_time_range("today") assert start is not None assert end is not None assert start.date() == now.date() assert end.date() == (now + timedelta(days=1)).date() # Test this week start, end = contextual_retriever._get_time_range("this_week") assert start is not None assert end is not None assert (end - start).days == 7 # Test no scope start, end = contextual_retriever._get_time_range(None) assert start is None assert end is None def test_calculate_text_similarity(self, contextual_retriever): """Test text similarity calculation""" query = "web development project" # Exact match text1 = "web development project" similarity1 = contextual_retriever._calculate_text_similarity(query, text1) assert similarity1 == 1.0 # Partial match text2 = "mobile development project" similarity2 = contextual_retriever._calculate_text_similarity(query, text2) assert 0 < similarity2 < 1.0 # No match text3 = "completely different content" similarity3 = contextual_retriever._calculate_text_similarity(query, text3) assert similarity3 == 0.0 # Empty query similarity4 = contextual_retriever._calculate_text_similarity("", text1) assert similarity4 == 0.0 class TestIntelligentRetrievalService: """Test IntelligentRetrievalService class""" @pytest_asyncio.fixture async def mock_database(self): """Mock database interface""" return AsyncMock() @pytest_asyncio.fixture async def mock_embedding_service(self): """Mock embedding service""" return AsyncMock() @pytest_asyncio.fixture async def retrieval_service(self, mock_database, mock_embedding_service): """Create retrieval service with mocks""" return IntelligentRetrievalService(mock_database, mock_embedding_service) @pytest.mark.asyncio async def test_search_basic(self, retrieval_service): """Test basic search functionality""" # Mock retriever mock_results = [ RetrievalResult( content_type="project", item_id="p1", title="Test Project", description="A test project", relevance_score=0.85, context_match={"intent_match": True}, metadata={"priority": "high"} ) ] retrieval_service.retriever.retrieve = AsyncMock(return_value=mock_results) result = await retrieval_service.search( user_id="user1", tenant_id="tenant1", query="test project" ) assert result["query"] == "test project" assert "context" in result assert "results" in result assert result["total_results"] == 1 assert "retrieval_metadata" in result # Check result structure results = result["results"] assert len(results) == 1 assert results[0]["content_type"] == "project" assert results[0]["item_id"] == "p1" @pytest.mark.asyncio async def test_search_with_filters(self, retrieval_service): """Test search with various filters""" mock_results = [] retrieval_service.retriever.retrieve = AsyncMock(return_value=mock_results) result = await retrieval_service.search( user_id="user1", tenant_id="tenant1", query="urgent tasks", intent="todo_planning", time_scope="today", priority_filter="high", content_types=["todos"], max_results=5, similarity_threshold=0.8 ) # Verify search context was created correctly call_args = retrieval_service.retriever.retrieve.call_args[0][0] assert call_args.query == "urgent tasks" assert call_args.intent == "todo_planning" assert call_args.time_scope == "today" assert call_args.priority_filter == "high" assert call_args.content_types == ["todos"] assert call_args.max_results == 5 assert call_args.similarity_threshold == 0.8 @pytest.mark.asyncio async def test_search_context_structure(self, retrieval_service): """Test search result context structure""" mock_results = [] retrieval_service.retriever.retrieve = AsyncMock(return_value=mock_results) result = await retrieval_service.search( user_id="user1", tenant_id="tenant1", query="test query" ) # Check context structure context = result["context"] assert "intent" in context assert "time_scope" in context assert "priority_filter" in context assert "content_types" in context # Check metadata structure metadata = result["retrieval_metadata"] assert "similarity_threshold" in metadata assert "max_results" in metadata assert "search_timestamp" in metadata @pytest.mark.asyncio async def test_search_empty_results(self, retrieval_service): """Test search with no results""" retrieval_service.retriever.retrieve = AsyncMock(return_value=[]) result = await retrieval_service.search( user_id="user1", tenant_id="tenant1", query="nonexistent content" ) assert result["results"] == [] assert result["total_results"] == 0 @pytest.mark.asyncio async def test_search_multiple_content_types(self, retrieval_service): """Test search across multiple content types""" mock_results = [ RetrievalResult( content_type="project", item_id="p1", title="Project 1", description="Description 1", relevance_score=0.9, context_match={}, metadata={} ), RetrievalResult( content_type="todo", item_id="t1", title="Todo 1", description="Description 1", relevance_score=0.8, context_match={}, metadata={} ), RetrievalResult( content_type="event", item_id="e1", title="Event 1", description="Description 1", relevance_score=0.7, context_match={}, metadata={} ) ] retrieval_service.retriever.retrieve = AsyncMock(return_value=mock_results) result = await retrieval_service.search( user_id="user1", tenant_id="tenant1", query="mixed content", content_types=["projects", "todos", "events"] ) assert result["total_results"] == 3 # Check that different content types are included content_types = [r["content_type"] for r in result["results"]] assert "project" in content_types assert "todo" in content_types assert "event" in content_types

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/swapnilsurdi/mcp-pa'

If you have feedback or need assistance with the MCP directory API, please join our Discord server