Skip to main content
Glama
davinoishi

Broken Link Checker MCP Server

by davinoishi
server.py4.77 kB
"""MCP server for broken link checker.""" import logging from fastmcp import FastMCP from .checker import LinkChecker from .models import CheckResult # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) logger = logging.getLogger(__name__) # Initialize FastMCP server mcp = FastMCP("Broken Link Checker", version="0.1.0") # Initialize the link checker checker = LinkChecker(timeout=10.0, rate_limit_delay=1.0) @mcp.tool() async def check_page(url: str) -> dict: """ Check all links on a single web page. This tool scans a single page and validates all links found on it, including: - Hyperlinks (a href) - Images (img src) - Scripts (script src) - Stylesheets (link href) - Media elements (video, audio, iframe) Args: url: The URL of the page to check (must be a valid HTTP/HTTPS URL) Returns: A dictionary containing: - results: List of link validation results with page_url, link_reference, link_url, and status ("Good" or "Bad") - summary: Statistics including total_links, good_links, bad_links, and pages_scanned Examples: >>> check_page("https://example.com") { "results": [ { "page_url": "https://example.com", "link_reference": "About Us", "link_url": "https://example.com/about", "status": "Good" } ], "summary": { "total_links": 15, "good_links": 14, "bad_links": 1, "pages_scanned": 1 } } """ try: logger.info(f"Received check_page request for: {url}") result = await checker.check_page(url) logger.info( f"Completed check_page for {url}: {result.summary.total_links} links checked" ) return result.model_dump() except Exception as e: logger.error(f"Error checking page {url}: {e}", exc_info=True) raise @mcp.tool() async def check_domain(url: str, max_depth: int = -1) -> dict: """ Recursively check all pages within a domain for broken links. This tool crawls an entire domain, following internal links and validating all links found on each page. It respects robots.txt and implements polite crawling with rate limiting. Args: url: The root URL of the domain to check (e.g., "https://example.com") max_depth: Maximum crawl depth (-1 for unlimited). Depth 0 means only the starting page, depth 1 includes pages linked from it, etc. Default is -1 (unlimited). Returns: A dictionary containing: - results: List of all link validation results across all crawled pages - summary: Statistics including total_links, good_links, bad_links, and pages_scanned Examples: >>> check_domain("https://example.com", max_depth=2) { "results": [ { "page_url": "https://example.com", "link_reference": "Contact", "link_url": "https://example.com/contact", "status": "Good" }, { "page_url": "https://example.com/about", "link_reference": "<img alt='logo'>", "link_url": "https://example.com/images/logo.png", "status": "Good" } ], "summary": { "total_links": 150, "good_links": 145, "bad_links": 5, "pages_scanned": 10 } } Note: - Only follows hyperlinks for crawling (not images, scripts, etc.) - Respects robots.txt directives - Applies rate limiting to be polite to servers - Can take significant time for large sites """ try: logger.info(f"Received check_domain request for: {url} (max_depth: {max_depth})") result = await checker.check_domain(url, max_depth=max_depth) logger.info( f"Completed check_domain for {url}: {result.summary.pages_scanned} pages, " f"{result.summary.total_links} links checked" ) return result.model_dump() except Exception as e: logger.error(f"Error checking domain {url}: {e}", exc_info=True) raise if __name__ == "__main__": # Run the MCP server with HTTP transport logger.info("Starting Broken Link Checker MCP Server on HTTP transport") mcp.run(transport="http", host="127.0.0.1", port=8000)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/davinoishi/BLC-ground'

If you have feedback or need assistance with the MCP directory API, please join our Discord server