Skip to main content
Glama
davinoishi

Broken Link Checker MCP Server

by davinoishi
test_link_extractor.py5.44 kB
"""Tests for link extractor.""" import pytest from src.link_extractor import LinkExtractor class TestLinkExtractor: """Test the LinkExtractor class.""" def setup_method(self): """Set up test fixtures.""" self.extractor = LinkExtractor() def test_extract_hyperlinks(self): """Test extraction of hyperlinks.""" html = """ <html> <body> <a href="https://example.com">Example</a> <a href="/relative/path">Relative Link</a> </body> </html> """ base_url = "https://test.com" links = self.extractor.extract_links(html, base_url) # Find hyperlinks hyperlinks = [l for l in links if l.element_type == "a"] assert len(hyperlinks) == 2 assert hyperlinks[0].url == "https://example.com" assert hyperlinks[0].reference == "Example" assert hyperlinks[1].url == "https://test.com/relative/path" def test_extract_images(self): """Test extraction of image sources.""" html = """ <html> <body> <img src="https://example.com/image.png" alt="Test Image"> <img src="/images/local.jpg"> </body> </html> """ base_url = "https://test.com" links = self.extractor.extract_links(html, base_url) images = [l for l in links if l.element_type == "img"] assert len(images) == 2 assert images[0].url == "https://example.com/image.png" assert 'alt="Test Image"' in images[0].reference assert images[1].url == "https://test.com/images/local.jpg" def test_extract_scripts(self): """Test extraction of script sources.""" html = """ <html> <head> <script src="https://cdn.example.com/script.js"></script> <script src="/js/app.js"></script> </head> </html> """ base_url = "https://test.com" links = self.extractor.extract_links(html, base_url) scripts = [l for l in links if l.element_type == "script"] assert len(scripts) == 2 assert scripts[0].url == "https://cdn.example.com/script.js" assert scripts[1].url == "https://test.com/js/app.js" def test_extract_stylesheets(self): """Test extraction of stylesheet links.""" html = """ <html> <head> <link rel="stylesheet" href="https://cdn.example.com/style.css"> <link rel="stylesheet" href="/css/main.css"> </head> </html> """ base_url = "https://test.com" links = self.extractor.extract_links(html, base_url) stylesheets = [l for l in links if l.element_type == "link"] assert len(stylesheets) == 2 assert stylesheets[0].url == "https://cdn.example.com/style.css" assert stylesheets[1].url == "https://test.com/css/main.css" def test_extract_video(self): """Test extraction of video sources.""" html = """ <html> <body> <video src="https://example.com/video.mp4"></video> <video> <source src="/videos/local.mp4"> </video> </body> </html> """ base_url = "https://test.com" links = self.extractor.extract_links(html, base_url) videos = [l for l in links if l.element_type == "video"] assert len(videos) == 2 assert videos[0].url == "https://example.com/video.mp4" assert videos[1].url == "https://test.com/videos/local.mp4" def test_extract_audio(self): """Test extraction of audio sources.""" html = """ <html> <body> <audio src="https://example.com/audio.mp3"></audio> <audio> <source src="/audio/local.mp3"> </audio> </body> </html> """ base_url = "https://test.com" links = self.extractor.extract_links(html, base_url) audios = [l for l in links if l.element_type == "audio"] assert len(audios) == 2 assert audios[0].url == "https://example.com/audio.mp3" assert audios[1].url == "https://test.com/audio/local.mp3" def test_extract_iframe(self): """Test extraction of iframe sources.""" html = """ <html> <body> <iframe src="https://example.com/embed"></iframe> </body> </html> """ base_url = "https://test.com" links = self.extractor.extract_links(html, base_url) iframes = [l for l in links if l.element_type == "iframe"] assert len(iframes) == 1 assert iframes[0].url == "https://example.com/embed" def test_is_internal_link(self): """Test internal link detection.""" assert self.extractor.is_internal_link( "https://example.com/page", "https://example.com" ) assert self.extractor.is_internal_link( "https://example.com/page", "https://example.com/" ) assert not self.extractor.is_internal_link( "https://other.com/page", "https://example.com" ) assert not self.extractor.is_internal_link( "https://sub.example.com/page", "https://example.com" )

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/davinoishi/BLC-ground'

If you have feedback or need assistance with the MCP directory API, please join our Discord server