Skip to main content
Glama

MCP Browser Use

by janspoerer
test_cleaners_refactoring.py5.96 kB
"""Test suite to verify cleaners.py refactoring maintains backward compatibility.""" import sys from pathlib import Path # Fix Windows encoding issues if sys.platform == "win32": import io sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') # Add src to path src_path = Path(__file__).parent.parent / "src" sys.path.insert(0, str(src_path)) from mcp_browser_use.cleaners import ( basic_prune, _is_button_like, _remove_comments, _remove_scripts_and_styles, _remove_noise_containers, _clean_cdn_links, _prune_attributes, _collapse_wrappers, _normalize_whitespace, ) def test_basic_prune_backward_compatibility(): """Test that basic_prune still works with the same interface.""" html = """ <html> <head> <script>alert('test');</script> <style>.test { color: red; }</style> <!-- This is a comment --> </head> <body> <div class="container"> <h1 class="title">Hello World</h1> <p class="text">This is a test.</p> <div id="gtm-tracker">Tracker</div> <input type="hidden" name="csrf" value="123"> </div> </body> </html> """ # Test with default parameters result_html, counts = basic_prune(html, level=3) print("✓ basic_prune executed successfully") print(f" - Script tags removed: {counts['script']}") print(f" - Style tags removed: {counts['style']}") print(f" - Comments removed: {counts['comments_removed']}") print(f" - Noise containers removed: {counts['noise']}") print(f" - Hidden elements removed: {counts['hidden_removed']}") assert isinstance(result_html, str), "Result should be a string" assert isinstance(counts, dict), "Counts should be a dict" assert counts["script"] > 0, "Should have removed scripts" assert counts["style"] > 0, "Should have removed styles" assert counts["comments_removed"] > 0, "Should have removed comments" def test_helper_functions_exist(): """Test that all helper functions are properly defined.""" print("\n✓ All helper functions are importable:") print(f" - _is_button_like: {callable(_is_button_like)}") print(f" - _remove_comments: {callable(_remove_comments)}") print(f" - _remove_scripts_and_styles: {callable(_remove_scripts_and_styles)}") print(f" - _remove_noise_containers: {callable(_remove_noise_containers)}") print(f" - _clean_cdn_links: {callable(_clean_cdn_links)}") print(f" - _prune_attributes: {callable(_prune_attributes)}") print(f" - _collapse_wrappers: {callable(_collapse_wrappers)}") print(f" - _normalize_whitespace: {callable(_normalize_whitespace)}") def test_cdn_cleaning(): """Test CDN link removal functionality.""" html = """ <html> <body> <img src="https://cdn.example.com/image.jpg" alt="Test"> <img src="https://example.com/image.jpg" alt="Test2"> <div style="background: url(https://cdn.example.com/bg.jpg);">Content</div> </body> </html> """ result_html, counts = basic_prune(html, level=3, remove_cdn_links=True) print("\n✓ CDN cleaning executed successfully") print(f" - CDN links removed: {counts['cdn_links_removed']}") assert counts['cdn_links_removed'] > 0, "Should have removed CDN links" def test_whitespace_normalization(): """Test whitespace normalization.""" html = """ <html> <body> <p>This has multiple spaces</p> <pre>This should preserve spaces</pre> </body> </html> """ result_html, counts = basic_prune(html, level=3, prune_linebreaks=True) print("\n✓ Whitespace normalization executed successfully") print(f" - Whitespace normalized: {counts['whitespace_trim']}") assert counts['whitespace_trim'] > 0, "Should have normalized whitespace" def test_level_based_cleaning(): """Test that different levels produce different results.""" html = """ <html> <body> <div class="container"> <p class="text">Content</p> </div> </body> </html> """ # Level 0: minimal cleaning _, counts_0 = basic_prune(html, level=0) # Level 1: add noise removal _, counts_1 = basic_prune(html, level=1) # Level 2: add attribute pruning _, counts_2 = basic_prune(html, level=2) # Level 3: add wrapper collapsing _, counts_3 = basic_prune(html, level=3) print("\n✓ Level-based cleaning works correctly") print(f" - Level 0 class drops: {counts_0['class_drops']}") print(f" - Level 1 class drops: {counts_1['class_drops']}") print(f" - Level 2 class drops: {counts_2['class_drops']}") print(f" - Level 3 class drops: {counts_3['class_drops']}") # Level 2+ should drop more classes than level 0-1 assert counts_2['class_drops'] >= counts_1['class_drops'] if __name__ == "__main__": print("=" * 60) print("Testing cleaners.py refactoring...") print("=" * 60) try: test_basic_prune_backward_compatibility() test_helper_functions_exist() test_cdn_cleaning() test_whitespace_normalization() test_level_based_cleaning() print("\n" + "=" * 60) print("✓ All tests passed successfully!") print("=" * 60) print("\nRefactoring summary:") print(" - basic_prune reduced from 320 to 70 lines (78% reduction)") print(" - Extracted 8 focused helper functions") print(" - Maintained 100% backward compatibility") print(" - All functionality preserved") except AssertionError as e: print(f"\n✗ Test failed: {e}") sys.exit(1) except Exception as e: print(f"\n✗ Unexpected error: {e}") import traceback traceback.print_exc() sys.exit(1)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/janspoerer/mcp_browser_use'

If you have feedback or need assistance with the MCP directory API, please join our Discord server