Skip to main content
Glama
test_indexer.py5.81 kB
"""Tests for BM25 indexer.""" import pytest from mcp_server_builder.utils.indexer import ( Doc, IndexSearch, _enhanced_tokenize, _generate_ngrams, ) class TestTokenization: """Tests for tokenization functions.""" def test_basic_tokenization(self) -> None: """Test basic word tokenization.""" tokens = _enhanced_tokenize("hello world") assert "hello" in tokens or "hello" in list(tokens) assert "world" in tokens or "world" in list(tokens) def test_stopword_removal(self) -> None: """Test that stop words are removed.""" tokens = _enhanced_tokenize("the quick brown fox") assert "the" not in tokens # "quick", "brown", "fox" should be stemmed but present def test_camelcase_splitting(self) -> None: """Test CamelCase is split into parts.""" # FastMCP is preserved as "fastmcp" since it's in PRESERVE_TERMS tokens = _enhanced_tokenize("FastMCP") assert "fastmcp" in tokens # Test with a non-preserved CamelCase term tokens2 = _enhanced_tokenize("AgentCore") # Should be split and stemmed: "agent" and "core" assert any("agent" in t for t in tokens2) or any( "agentcor" in t for t in tokens2 ) def test_preserve_terms(self) -> None: """Test domain-specific terms are preserved unstemmed.""" tokens = _enhanced_tokenize("mcp json rpc stdio") assert "mcp" in tokens assert "json" in tokens assert "rpc" in tokens assert "stdio" in tokens def test_stemming(self) -> None: """Test that stemming is applied.""" tokens = _enhanced_tokenize("running tools transports") # "running" -> "run", "tools" -> "tool", "transports" -> "transport" assert "run" in tokens or "running" in tokens assert "tool" in tokens or "tools" in tokens class TestNgrams: """Tests for n-gram generation.""" def test_bigrams(self) -> None: """Test bigram generation.""" tokens = ["hello", "world", "test"] bigrams = _generate_ngrams(tokens, 2) assert "hello_world" in bigrams assert "world_test" in bigrams assert len(bigrams) == 2 def test_trigrams(self) -> None: """Test trigram generation.""" tokens = ["a", "b", "c", "d"] trigrams = _generate_ngrams(tokens, 3) assert "a_b_c" in trigrams assert "b_c_d" in trigrams assert len(trigrams) == 2 def test_empty_ngrams(self) -> None: """Test n-grams with insufficient tokens.""" tokens = ["single"] bigrams = _generate_ngrams(tokens, 2) assert bigrams == [] class TestIndexSearch: """Tests for IndexSearch class.""" @pytest.fixture def sample_index(self) -> IndexSearch: """Create a sample index with test documents.""" index = IndexSearch() index.add( Doc( uri="https://example.com/tools", display_title="MCP Tools Guide", content="Tools are the primary way for MCP servers to expose functionality.", index_title="MCP Tools Guide", ) ) index.add( Doc( uri="https://example.com/resources", display_title="Resources Documentation", content="Resources provide read-only data access to clients.", index_title="Resources Documentation", ) ) index.add( Doc( uri="https://example.com/prompts", display_title="Prompts Reference", content="Prompts are user-controlled templates for common operations.", index_title="Prompts Reference", ) ) return index def test_add_document(self) -> None: """Test adding a document to the index.""" index = IndexSearch() doc = Doc( uri="https://test.com", display_title="Test", content="Test content", index_title="Test", ) result = index.add(doc) assert result is index # Returns self for chaining assert len(index.docs) == 1 assert index.docs[0] == doc def test_search_returns_results(self, sample_index: IndexSearch) -> None: """Test that search returns relevant results.""" results = sample_index.search("tools functionality") assert len(results) > 0 # First result should be the tools document score, doc = results[0] assert "tools" in doc.uri.lower() or "tool" in doc.display_title.lower() def test_search_empty_query(self, sample_index: IndexSearch) -> None: """Test search with empty query.""" results = sample_index.search("") # Should return empty or handle gracefully assert isinstance(results, list) def test_search_no_matches(self, sample_index: IndexSearch) -> None: """Test search with no matching terms.""" results = sample_index.search("xyznonexistent123") assert results == [] def test_search_respects_k(self, sample_index: IndexSearch) -> None: """Test that search respects the k parameter.""" results = sample_index.search("documentation", k=1) assert len(results) <= 1 def test_empty_index_search(self) -> None: """Test search on empty index.""" index = IndexSearch() results = index.search("anything") assert results == [] def test_doc_lengths_tracked(self, sample_index: IndexSearch) -> None: """Test that document lengths are tracked for BM25.""" assert len(sample_index.doc_lengths) == 3 assert sample_index.avg_doc_length > 0

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/praveenc/mcp-server-builder'

If you have feedback or need assistance with the MCP directory API, please join our Discord server