PortOne MCP Server

Official

Overview Schema Related Servers Score Discussions

mcp-server
tests

test_bm25.py•6.8 KiB

from portone_mcp_server.loader.markdown import Frontmatter, MarkdownDocument from portone_mcp_server.tools.utils.bm25 import calculate_bm25_scores, get_top_documents class TestBM25Scoring: """Test cases for BM25 scoring functions.""" def test_calculate_bm25_scores_empty_documents(self): """Test BM25 calculation with empty document collection.""" result = calculate_bm25_scores("test", {}) assert result == [] def test_calculate_bm25_scores_no_matches(self): """Test BM25 calculation when query doesn't match any documents.""" documents = { "doc1.md": MarkdownDocument( path="doc1.md", content="This is a document about Python programming.", ), "doc2.md": MarkdownDocument( path="doc2.md", content="Another document about web development.", ), } result = calculate_bm25_scores("nonexistent", documents) assert result == [] def test_calculate_bm25_scores_single_match(self): """Test BM25 calculation with a single matching document.""" documents = { "doc1.md": MarkdownDocument( path="doc1.md", content="Python is a great programming language. Python is versatile.", ), "doc2.md": MarkdownDocument( path="doc2.md", content="JavaScript is used for web development.", ), } result = calculate_bm25_scores("Python", documents) assert len(result) == 1 assert result[0][0] == "doc1.md" # When term appears in 1 out of 2 docs, IDF can be 0 assert result[0][1] >= 0 def test_calculate_bm25_scores_multiple_matches(self): """Test BM25 calculation with multiple matching documents.""" documents = { "doc1.md": MarkdownDocument( path="doc1.md", content="Python programming is fun. Python is easy to learn.", ), "doc2.md": MarkdownDocument( path="doc2.md", content="I love Python programming.", ), "doc3.md": MarkdownDocument( path="doc3.md", content="Java is another programming language.", ), } result = calculate_bm25_scores("Python", documents) assert len(result) == 2 # Both documents contain "Python" doc_paths = [r[0] for r in result] assert "doc1.md" in doc_paths assert "doc2.md" in doc_paths # BM25 scores can be negative when IDF is negative # (term appears in most documents) def test_calculate_bm25_scores_with_frontmatter(self): """Test BM25 calculation including frontmatter content.""" frontmatter = Frontmatter( title="Python Tutorial", description="Learn Python basics", raw_string="---\ntitle: Python Tutorial\ndescription: Learn Python basics\n---", ) documents = { "doc1.md": MarkdownDocument( path="doc1.md", content="This is a programming tutorial.", frontmatter=frontmatter, ), "doc2.md": MarkdownDocument( path="doc2.md", content="JavaScript tutorial content.", ), } result = calculate_bm25_scores("Python", documents) assert len(result) == 1 assert result[0][0] == "doc1.md" def test_calculate_bm25_scores_regex_pattern(self): """Test BM25 calculation with regex patterns.""" documents = { "doc1.md": MarkdownDocument( path="doc1.md", content="Error code: ERR-001. Another error: ERR-002.", ), "doc2.md": MarkdownDocument( path="doc2.md", content="Success message: OK-200.", ), } result = calculate_bm25_scores(r"ERR-\d+", documents) assert len(result) == 1 assert result[0][0] == "doc1.md" def test_calculate_bm25_scores_invalid_regex(self): """Test BM25 calculation with invalid regex pattern.""" documents = { "doc1.md": MarkdownDocument( path="doc1.md", content="Some content", ), } result = calculate_bm25_scores("[invalid(regex", documents) assert result == [] def test_calculate_bm25_scores_case_insensitive(self): """Test that BM25 scoring is case insensitive.""" documents = { "doc1.md": MarkdownDocument( path="doc1.md", content="PYTHON is great. python is fun. Python rocks!", ), } result = calculate_bm25_scores("python", documents) assert len(result) == 1 assert result[0][0] == "doc1.md" def test_get_top_documents_limits_results(self): """Test that get_top_documents returns only top_k results.""" documents = {} for i in range(20): documents[f"doc{i}.md"] = MarkdownDocument( path=f"doc{i}.md", content=f"Document {i} contains the word test." + " test" * i, ) result = get_top_documents("test", documents, top_k=5) assert len(result) == 5 # Verify ordering (documents with more "test" occurrences should rank higher) for i in range(len(result) - 1): assert result[i][1] >= result[i + 1][1] def test_get_top_documents_fewer_than_k(self): """Test get_top_documents when there are fewer matching docs than k.""" documents = { "doc1.md": MarkdownDocument( path="doc1.md", content="Python programming", ), "doc2.md": MarkdownDocument( path="doc2.md", content="Java programming", ), } result = get_top_documents("Python", documents, top_k=10) assert len(result) == 1 def test_bm25_parameters_affect_scoring(self): """Test that k1 and b parameters affect the scoring.""" documents = { "short.md": MarkdownDocument( path="short.md", content="Python Python", ), "long.md": MarkdownDocument( path="long.md", content="Python is a programming language. " * 20 + " Python", ), } # Test with different b values (length normalization) result_b0 = calculate_bm25_scores("Python", documents, k1=1.2, b=0.0) result_b1 = calculate_bm25_scores("Python", documents, k1=1.2, b=1.0) # With b=0, document length shouldn't matter # With b=1, longer documents should be penalized more assert result_b0[0][1] != result_b1[0][1]

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/portone-io/mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_bm25.py•6.8 KiB