MCP Server Builder

test_text_processor.py•5.72 KiB

"""Tests for text processor utilities.""" from mcp_server_builder.utils.doc_fetcher import Page from mcp_server_builder.utils.text_processor import ( format_display_title, index_title_variants, make_snippet, normalize, normalize_for_comparison, title_from_url, ) class TestNormalize: """Tests for normalize function.""" def test_collapses_whitespace(self) -> None: """Test that multiple spaces are collapsed.""" assert normalize("hello world") == "hello world" def test_strips_edges(self) -> None: """Test that leading/trailing whitespace is removed.""" assert normalize(" hello ") == "hello" def test_handles_newlines(self) -> None: """Test that newlines are converted to spaces.""" assert normalize("hello\nworld") == "hello world" def test_empty_string(self) -> None: """Test empty string handling.""" assert normalize("") == "" class TestTitleFromUrl: """Tests for title_from_url function.""" def test_extracts_slug(self) -> None: """Test extracting title from URL slug.""" url = "https://example.com/docs/getting-started" assert title_from_url(url) == "Getting Started" def test_handles_underscores(self) -> None: """Test converting underscores to spaces.""" url = "https://example.com/api_reference" assert title_from_url(url) == "Api Reference" def test_removes_index_files(self) -> None: """Test that index.* files are handled.""" url = "https://example.com/docs/index.html" result = title_from_url(url) assert "index" not in result.lower() def test_fallback_to_documentation(self) -> None: """Test fallback when no meaningful path.""" url = "https://example.com/" result = title_from_url(url) assert result # Should have some value class TestFormatDisplayTitle: """Tests for format_display_title function.""" def test_prefers_curated_title(self) -> None: """Test that curated titles are preferred.""" url = "https://example.com/page" url_titles = {url: "Curated Title"} result = format_display_title(url, "Extracted Title", url_titles) assert result == "Curated Title" def test_uses_extracted_when_no_curated(self) -> None: """Test using extracted title when no curated available.""" result = format_display_title( "https://example.com/page", "Extracted Title", {}, ) assert result == "Extracted Title" def test_uses_url_when_no_extracted(self) -> None: """Test using URL-derived title when no extracted.""" result = format_display_title( "https://example.com/my-page", None, {}, ) assert "My Page" in result or "my" in result.lower() def test_rejects_generic_titles(self) -> None: """Test that generic titles like 'index' are rejected.""" result = format_display_title( "https://example.com/docs", "index", {}, ) assert result.lower() != "index" class TestIndexTitleVariants: """Tests for index_title_variants function.""" def test_includes_original(self) -> None: """Test that original title is included.""" result = index_title_variants("Hello World", "https://example.com/hello") assert "Hello World" in result def test_numeric_to_word(self) -> None: """Test Agent2Agent -> Agent to Agent conversion.""" result = index_title_variants("Agent2Agent", "https://example.com/a2a") assert "to" in result.lower() class TestNormalizeForComparison: """Tests for normalize_for_comparison function.""" def test_lowercase(self) -> None: """Test that string is lowercased.""" result = normalize_for_comparison("Hello WORLD") assert result == "hello world" def test_removes_punctuation(self) -> None: """Test that punctuation is removed.""" result = normalize_for_comparison("Hello, World!") assert "," not in result assert "!" not in result class TestMakeSnippet: """Tests for make_snippet function.""" def test_returns_title_for_none_page(self) -> None: """Test fallback to title when page is None.""" result = make_snippet(None, "Fallback Title") assert result == "Fallback Title" def test_returns_title_for_empty_content(self) -> None: """Test fallback when content is empty.""" page = Page(url="https://test.com", title="Test", content="") result = make_snippet(page, "Fallback Title") assert result == "Fallback Title" def test_extracts_first_paragraph(self) -> None: """Test extracting first meaningful paragraph.""" content = """# Heading This is the first paragraph that should be extracted. ## Another heading More content here. """ page = Page(url="https://test.com", title="Test", content=content) result = make_snippet(page, "Test") assert "first paragraph" in result def test_truncates_long_snippets(self) -> None: """Test that long snippets are truncated.""" long_content = "A" * 500 page = Page(url="https://test.com", title="Test", content=long_content) result = make_snippet(page, "Test", max_chars=100) assert len(result) <= 100 assert result.endswith("…") def test_skips_headings(self) -> None: """Test that markdown headings are skipped.""" content = """# Main Heading ## Sub Heading This is the actual content. """ page = Page(url="https://test.com", title="Test", content=content) result = make_snippet(page, "Test") assert not result.startswith("#")

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/praveenc/mcp-server-builder'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_text_processor.py•5.72 KiB