Skip to main content
Glama
test_text_processor.py5.86 kB
"""Tests for text processor utilities.""" from mcp_server_builder.utils.doc_fetcher import Page from mcp_server_builder.utils.text_processor import ( format_display_title, index_title_variants, make_snippet, normalize, normalize_for_comparison, title_from_url, ) class TestNormalize: """Tests for normalize function.""" def test_collapses_whitespace(self) -> None: """Test that multiple spaces are collapsed.""" assert normalize("hello world") == "hello world" def test_strips_edges(self) -> None: """Test that leading/trailing whitespace is removed.""" assert normalize(" hello ") == "hello" def test_handles_newlines(self) -> None: """Test that newlines are converted to spaces.""" assert normalize("hello\nworld") == "hello world" def test_empty_string(self) -> None: """Test empty string handling.""" assert normalize("") == "" class TestTitleFromUrl: """Tests for title_from_url function.""" def test_extracts_slug(self) -> None: """Test extracting title from URL slug.""" url = "https://example.com/docs/getting-started" assert title_from_url(url) == "Getting Started" def test_handles_underscores(self) -> None: """Test converting underscores to spaces.""" url = "https://example.com/api_reference" assert title_from_url(url) == "Api Reference" def test_removes_index_files(self) -> None: """Test that index.* files are handled.""" url = "https://example.com/docs/index.html" result = title_from_url(url) assert "index" not in result.lower() def test_fallback_to_documentation(self) -> None: """Test fallback when no meaningful path.""" url = "https://example.com/" result = title_from_url(url) assert result # Should have some value class TestFormatDisplayTitle: """Tests for format_display_title function.""" def test_prefers_curated_title(self) -> None: """Test that curated titles are preferred.""" url = "https://example.com/page" url_titles = {url: "Curated Title"} result = format_display_title(url, "Extracted Title", url_titles) assert result == "Curated Title" def test_uses_extracted_when_no_curated(self) -> None: """Test using extracted title when no curated available.""" result = format_display_title( "https://example.com/page", "Extracted Title", {}, ) assert result == "Extracted Title" def test_uses_url_when_no_extracted(self) -> None: """Test using URL-derived title when no extracted.""" result = format_display_title( "https://example.com/my-page", None, {}, ) assert "My Page" in result or "my" in result.lower() def test_rejects_generic_titles(self) -> None: """Test that generic titles like 'index' are rejected.""" result = format_display_title( "https://example.com/docs", "index", {}, ) assert result.lower() != "index" class TestIndexTitleVariants: """Tests for index_title_variants function.""" def test_includes_original(self) -> None: """Test that original title is included.""" result = index_title_variants("Hello World", "https://example.com/hello") assert "Hello World" in result def test_numeric_to_word(self) -> None: """Test Agent2Agent -> Agent to Agent conversion.""" result = index_title_variants("Agent2Agent", "https://example.com/a2a") assert "to" in result.lower() class TestNormalizeForComparison: """Tests for normalize_for_comparison function.""" def test_lowercase(self) -> None: """Test that string is lowercased.""" result = normalize_for_comparison("Hello WORLD") assert result == "hello world" def test_removes_punctuation(self) -> None: """Test that punctuation is removed.""" result = normalize_for_comparison("Hello, World!") assert "," not in result assert "!" not in result class TestMakeSnippet: """Tests for make_snippet function.""" def test_returns_title_for_none_page(self) -> None: """Test fallback to title when page is None.""" result = make_snippet(None, "Fallback Title") assert result == "Fallback Title" def test_returns_title_for_empty_content(self) -> None: """Test fallback when content is empty.""" page = Page(url="https://test.com", title="Test", content="") result = make_snippet(page, "Fallback Title") assert result == "Fallback Title" def test_extracts_first_paragraph(self) -> None: """Test extracting first meaningful paragraph.""" content = """# Heading This is the first paragraph that should be extracted. ## Another heading More content here. """ page = Page(url="https://test.com", title="Test", content=content) result = make_snippet(page, "Test") assert "first paragraph" in result def test_truncates_long_snippets(self) -> None: """Test that long snippets are truncated.""" long_content = "A" * 500 page = Page(url="https://test.com", title="Test", content=long_content) result = make_snippet(page, "Test", max_chars=100) assert len(result) <= 100 assert result.endswith("…") def test_skips_headings(self) -> None: """Test that markdown headings are skipped.""" content = """# Main Heading ## Sub Heading This is the actual content. """ page = Page(url="https://test.com", title="Test", content=content) result = make_snippet(page, "Test") assert not result.startswith("#")

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/praveenc/mcp-server-builder'

If you have feedback or need assistance with the MCP directory API, please join our Discord server