"""MCP server for broken link checker."""
import logging
from fastmcp import FastMCP
from .checker import LinkChecker
from .models import CheckResult
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
# Initialize FastMCP server
mcp = FastMCP("Broken Link Checker", version="0.1.0")
# Initialize the link checker
checker = LinkChecker(timeout=10.0, rate_limit_delay=1.0)
@mcp.tool()
async def check_page(url: str) -> dict:
"""
Check all links on a single web page.
This tool scans a single page and validates all links found on it, including:
- Hyperlinks (a href)
- Images (img src)
- Scripts (script src)
- Stylesheets (link href)
- Media elements (video, audio, iframe)
Args:
url: The URL of the page to check (must be a valid HTTP/HTTPS URL)
Returns:
A dictionary containing:
- results: List of link validation results with page_url, link_reference,
link_url, and status ("Good" or "Bad")
- summary: Statistics including total_links, good_links, bad_links,
and pages_scanned
Examples:
>>> check_page("https://example.com")
{
"results": [
{
"page_url": "https://example.com",
"link_reference": "About Us",
"link_url": "https://example.com/about",
"status": "Good"
}
],
"summary": {
"total_links": 15,
"good_links": 14,
"bad_links": 1,
"pages_scanned": 1
}
}
"""
try:
logger.info(f"Received check_page request for: {url}")
result = await checker.check_page(url)
logger.info(
f"Completed check_page for {url}: {result.summary.total_links} links checked"
)
return result.model_dump()
except Exception as e:
logger.error(f"Error checking page {url}: {e}", exc_info=True)
raise
@mcp.tool()
async def check_domain(url: str, max_depth: int = -1) -> dict:
"""
Recursively check all pages within a domain for broken links.
This tool crawls an entire domain, following internal links and validating
all links found on each page. It respects robots.txt and implements polite
crawling with rate limiting.
Args:
url: The root URL of the domain to check (e.g., "https://example.com")
max_depth: Maximum crawl depth (-1 for unlimited). Depth 0 means only
the starting page, depth 1 includes pages linked from it, etc.
Default is -1 (unlimited).
Returns:
A dictionary containing:
- results: List of all link validation results across all crawled pages
- summary: Statistics including total_links, good_links, bad_links,
and pages_scanned
Examples:
>>> check_domain("https://example.com", max_depth=2)
{
"results": [
{
"page_url": "https://example.com",
"link_reference": "Contact",
"link_url": "https://example.com/contact",
"status": "Good"
},
{
"page_url": "https://example.com/about",
"link_reference": "<img alt='logo'>",
"link_url": "https://example.com/images/logo.png",
"status": "Good"
}
],
"summary": {
"total_links": 150,
"good_links": 145,
"bad_links": 5,
"pages_scanned": 10
}
}
Note:
- Only follows hyperlinks for crawling (not images, scripts, etc.)
- Respects robots.txt directives
- Applies rate limiting to be polite to servers
- Can take significant time for large sites
"""
try:
logger.info(f"Received check_domain request for: {url} (max_depth: {max_depth})")
result = await checker.check_domain(url, max_depth=max_depth)
logger.info(
f"Completed check_domain for {url}: {result.summary.pages_scanned} pages, "
f"{result.summary.total_links} links checked"
)
return result.model_dump()
except Exception as e:
logger.error(f"Error checking domain {url}: {e}", exc_info=True)
raise
if __name__ == "__main__":
# Run the MCP server with HTTP transport
logger.info("Starting Broken Link Checker MCP Server on HTTP transport")
mcp.run(transport="http", host="127.0.0.1", port=8000)