Documentation MCP Server

test_search.py•19.2 KiB

"""Unit tests for search functionality.""" from datetime import datetime, timezone from unittest.mock import patch import pytest from docs_mcp.models.document import Document from docs_mcp.models.navigation import Category from docs_mcp.services.search import ( SearchError, _extract_excerpt, _highlight_matches, search_by_metadata, search_content, ) class TestSearchContent: """Test search_content function.""" @pytest.fixture def sample_documents(self): """Create sample documents for testing.""" return [ Document( uri="docs://guides/getting-started", title="Getting Started Guide", content="This is a comprehensive guide to getting started with the system. " "It covers installation, configuration, and basic usage.", category="guides", tags=["tutorial", "beginner"], file_path="/docs/guides/getting-started.md", relative_path="docs/guides/getting-started.md", size_bytes=100, last_modified=datetime.now(timezone.utc), ), Document( uri="docs://api/authentication", title="Authentication API", content="The authentication API provides endpoints for user login, " "logout, and token management. Use JWT tokens for secure access.", category="api", tags=["security", "api"], file_path="/docs/api/authentication.md", relative_path="docs/api/authentication.md", size_bytes=100, last_modified=datetime.now(timezone.utc), ), Document( uri="docs://guides/advanced", title="Advanced Topics", content="Advanced configuration options and optimization techniques. " "Learn how to fine-tune performance and customize behavior.", category="guides", tags=["advanced", "tutorial"], file_path="/docs/guides/advanced.md", relative_path="docs/guides/advanced.md", size_bytes=100, last_modified=datetime.now(timezone.utc), ), ] @pytest.fixture def sample_categories(self): """Create sample categories for testing.""" return { "docs://guides": Category( name="guides", uri="docs://guides", label="Guides", depth=0, source_category="guides", child_documents=[ "docs://guides/getting-started", "docs://guides/advanced", ], child_categories=[], document_count=2, ), "docs://api": Category( name="api", uri="docs://api", label="API", depth=0, source_category="api", child_documents=["docs://api/authentication"], child_categories=[], document_count=1, ), } def test_search_content_basic_query(self, sample_documents, sample_categories): """Test basic content search.""" results = search_content("authentication", sample_documents, sample_categories) assert len(results) > 0 assert any("authentication" in r.title.lower() for r in results) def test_search_content_in_title(self, sample_documents, sample_categories): """Test search matching in document title.""" results = search_content("Getting Started", sample_documents, sample_categories) assert len(results) > 0 first_result = results[0] assert "Getting Started" in first_result.title assert first_result.match_type == "title" assert first_result.relevance_score > 0 def test_search_content_in_body(self, sample_documents, sample_categories): """Test search matching in document content.""" results = search_content("JWT", sample_documents, sample_categories) assert len(results) > 0 assert any("JWT" in r.excerpt for r in results) def test_search_content_case_insensitive(self, sample_documents, sample_categories): """Test search is case-insensitive.""" results_lower = search_content("authentication", sample_documents, sample_categories) results_upper = search_content("AUTHENTICATION", sample_documents, sample_categories) results_mixed = search_content("Authentication", sample_documents, sample_categories) assert len(results_lower) == len(results_upper) == len(results_mixed) def test_search_content_with_limit(self, sample_documents, sample_categories): """Test search respects result limit.""" results = search_content("guide", sample_documents, sample_categories, limit=1) assert len(results) == 1 def test_search_content_with_category_filter(self, sample_documents, sample_categories): """Test search with category filter.""" results = search_content( "guide", sample_documents, sample_categories, category_filter="guides" ) assert len(results) > 0 assert all("guides" in r.document_uri for r in results) def test_search_content_empty_query(self, sample_documents, sample_categories): """Test search with empty query returns empty results.""" results = search_content("", sample_documents, sample_categories) assert len(results) == 0 def test_search_content_no_matches(self, sample_documents, sample_categories): """Test search with no matching documents.""" results = search_content("xyznonexistent", sample_documents, sample_categories) assert len(results) == 0 def test_search_content_relevance_scoring(self, sample_documents, sample_categories): """Test search results are sorted by relevance.""" results = search_content("guide", sample_documents, sample_categories) assert len(results) > 0 # Results should be sorted by relevance (highest first) for i in range(len(results) - 1): assert results[i].relevance_score >= results[i + 1].relevance_score def test_search_content_metadata_match(self, sample_documents, sample_categories): """Test search matching in metadata (tags, category).""" results = search_content("tutorial", sample_documents, sample_categories) assert len(results) > 0 # Should match documents with 'tutorial' tag assert any( "tutorial" in doc.tags for doc in sample_documents if doc.uri in [r.document_uri for r in results] ) def test_search_content_special_characters_escaped(self, sample_documents, sample_categories): """Test search handles regex special characters.""" # This should not cause regex errors results = search_content("API.", sample_documents, sample_categories) # Should work without raising exceptions assert isinstance(results, list) def test_search_content_invalid_regex_raises_error(self, sample_documents, sample_categories): """Test search with invalid regex pattern.""" with patch("docs_mcp.services.search.sanitize_query") as mock_sanitize: mock_sanitize.return_value = "[" # Invalid regex with pytest.raises(SearchError, match="Invalid search pattern"): search_content("[", sample_documents, sample_categories) def test_search_content_breadcrumbs_included(self, sample_documents, sample_categories): """Test search results include breadcrumbs.""" results = search_content("authentication", sample_documents, sample_categories) assert len(results) > 0 for result in results: assert isinstance(result.breadcrumbs, list) def test_search_content_highlighted_excerpt(self, sample_documents, sample_categories): """Test search results include highlighted excerpts.""" results = search_content("authentication", sample_documents, sample_categories) assert len(results) > 0 for result in results: if result.relevance_score > 0: assert result.excerpt != "" def test_search_content_multiple_matches(self, sample_documents, sample_categories): """Test search with multiple matching documents.""" results = search_content("guide", sample_documents, sample_categories) # Should find multiple documents with "guide" in title or content assert len(results) >= 2 def test_search_content_caching(self, sample_documents, sample_categories): """Test search results are cached.""" # First search results1 = search_content("authentication", sample_documents, sample_categories) # Second search (should be cached) with patch("docs_mcp.services.search.get_cache") as mock_cache: mock_cache.return_value.get.return_value = results1 results2 = search_content("authentication", sample_documents, sample_categories) # Should return cached results assert results1 == results2 class TestSearchByMetadata: """Test search_by_metadata function.""" @pytest.fixture def sample_documents(self): """Create sample documents for testing.""" return [ Document( uri="docs://guides/getting-started", title="Getting Started", content="Introduction to the system", category="guides", tags=["tutorial", "beginner"], file_path="/docs/guides/getting-started.md", relative_path="docs/guides/getting-started.md", size_bytes=100, last_modified=datetime.now(timezone.utc), ), Document( uri="docs://api/authentication", title="Authentication", content="Authentication details", category="api", tags=["security", "api"], file_path="/docs/api/authentication.md", relative_path="docs/api/authentication.md", size_bytes=100, last_modified=datetime.now(timezone.utc), ), Document( uri="docs://guides/advanced", title="Advanced Guide", content="Advanced topics", category="guides", tags=["tutorial", "advanced"], file_path="/docs/guides/advanced.md", relative_path="docs/guides/advanced.md", size_bytes=100, last_modified=datetime.now(timezone.utc), ), ] def test_search_by_tags(self, sample_documents): """Test searching by tags.""" results = search_by_metadata(tags=["tutorial"], documents=sample_documents) assert len(results) > 0 assert all( "tutorial" in doc.tags for doc in sample_documents if doc.uri in [r.document_uri for r in results] ) def test_search_by_multiple_tags(self, sample_documents): """Test searching by multiple tags (OR logic).""" results = search_by_metadata(tags=["security", "beginner"], documents=sample_documents) assert len(results) >= 2 # Should find documents with either "security" OR "beginner" def test_search_by_category(self, sample_documents): """Test searching by category.""" results = search_by_metadata(category="guides", documents=sample_documents) assert len(results) == 2 assert all(r.category == "guides" or "guides" in r.document_uri for r in results) def test_search_by_tags_and_category(self, sample_documents): """Test searching by both tags and category (AND logic).""" results = search_by_metadata( tags=["tutorial"], category="guides", documents=sample_documents ) assert len(results) > 0 # Should find documents that match BOTH criteria for result in results: matching_doc = next(d for d in sample_documents if d.uri == result.document_uri) assert "tutorial" in matching_doc.tags assert matching_doc.category == "guides" def test_search_by_metadata_with_limit(self, sample_documents): """Test metadata search respects limit.""" results = search_by_metadata(tags=["tutorial"], documents=sample_documents, limit=1) assert len(results) == 1 def test_search_by_metadata_no_matches(self, sample_documents): """Test metadata search with no matches.""" results = search_by_metadata(tags=["nonexistent"], documents=sample_documents) assert len(results) == 0 def test_search_by_metadata_empty_filters(self, sample_documents): """Test metadata search with no filters returns all documents.""" results = search_by_metadata(documents=sample_documents, limit=10) assert len(results) == len(sample_documents) def test_search_by_metadata_none_documents(self): """Test metadata search with None documents.""" results = search_by_metadata(tags=["test"], documents=None) assert len(results) == 0 def test_search_by_metadata_includes_breadcrumbs(self, sample_documents): """Test metadata search results include breadcrumbs.""" results = search_by_metadata(tags=["tutorial"], documents=sample_documents) assert len(results) > 0 for result in results: assert isinstance(result.breadcrumbs, list) def test_search_by_metadata_includes_excerpt(self, sample_documents): """Test metadata search results include excerpt.""" results = search_by_metadata(tags=["tutorial"], documents=sample_documents) assert len(results) > 0 for result in results: assert result.excerpt != "" def test_search_by_metadata_match_type(self, sample_documents): """Test metadata search results have correct match type.""" results = search_by_metadata(tags=["tutorial"], documents=sample_documents) assert len(results) > 0 for result in results: assert result.match_type == "metadata" def test_search_by_metadata_relevance_score(self, sample_documents): """Test metadata search results have relevance score.""" results = search_by_metadata(tags=["tutorial"], documents=sample_documents) assert len(results) > 0 for result in results: assert result.relevance_score == 1.0 class TestExtractExcerpt: """Test _extract_excerpt helper function.""" def test_extract_excerpt_with_match(self): """Test extracting excerpt with query match.""" content = "This is a long document with some interesting content about Python programming." query = "interesting" excerpt = _extract_excerpt(content, query, context_chars=20) assert "interesting" in excerpt assert "..." in excerpt # Should have ellipsis for truncation def test_extract_excerpt_no_match(self): """Test extracting excerpt when query doesn't match.""" content = "This is a document without the search term." query = "nonexistent" excerpt = _extract_excerpt(content, query, context_chars=20) assert len(excerpt) > 0 assert excerpt.endswith("...") def test_extract_excerpt_at_start(self): """Test extracting excerpt when match is at start.""" content = "Python is a great language for programming and data science." query = "Python" excerpt = _extract_excerpt(content, query, context_chars=20) assert excerpt.startswith("Python") assert excerpt.endswith("...") def test_extract_excerpt_at_end(self): """Test extracting excerpt when match is at end.""" content = "This document is about programming in Python" query = "Python" excerpt = _extract_excerpt(content, query, context_chars=20) assert "Python" in excerpt assert excerpt.startswith("...") def test_extract_excerpt_short_content(self): """Test extracting excerpt from short content.""" content = "Short text" query = "text" excerpt = _extract_excerpt(content, query, context_chars=20) assert "text" in excerpt def test_extract_excerpt_case_insensitive(self): """Test extract excerpt is case-insensitive.""" content = "This document contains PYTHON programming examples." query = "python" excerpt = _extract_excerpt(content, query, context_chars=20) assert "PYTHON" in excerpt class TestHighlightMatches: """Test _highlight_matches helper function.""" def test_highlight_matches_basic(self): """Test basic match highlighting.""" text = "This is a test document" query = "test" highlighted = _highlight_matches(text, query) assert "**test**" in highlighted def test_highlight_matches_case_insensitive(self): """Test highlighting is case-insensitive.""" text = "This is a TEST document" query = "test" highlighted = _highlight_matches(text, query) assert "**TEST**" in highlighted def test_highlight_matches_multiple_occurrences(self): """Test highlighting multiple occurrences.""" text = "test test test" query = "test" highlighted = _highlight_matches(text, query) assert highlighted.count("**test**") == 3 def test_highlight_matches_no_match(self): """Test highlighting with no matches.""" text = "This is a document" query = "nonexistent" highlighted = _highlight_matches(text, query) assert highlighted == text def test_highlight_matches_custom_marker(self): """Test highlighting with custom marker.""" text = "This is a test" query = "test" highlighted = _highlight_matches(text, query, highlight="__") assert "__test__" in highlighted def test_highlight_matches_special_characters(self): """Test highlighting handles special characters.""" text = "Use API." query = "API." # Should not raise exception highlighted = _highlight_matches(text, query) assert isinstance(highlighted, str) class TestSearchError: """Test SearchError exception.""" def test_search_error_creation(self): """Test creating SearchError.""" error = SearchError("Test error message") assert str(error) == "Test error message" assert isinstance(error, Exception) def test_search_error_can_be_raised(self): """Test SearchError can be raised and caught.""" with pytest.raises(SearchError): raise SearchError("Test error") def test_search_error_with_cause(self): """Test SearchError with underlying cause.""" try: try: raise ValueError("Original error") except ValueError as e: raise SearchError("Search failed") from e except SearchError as error: assert str(error) == "Search failed" assert isinstance(error.__cause__, ValueError)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/esola-thomas/your-docs-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_search.py•19.2 KiB