Skip to main content
Glama

Broken Link Checker MCP Server

by davinoishi
server.js8.22 kB
#!/usr/bin/env node import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js"; import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextprotocol/sdk/types.js"; import express from "express"; import cors from "cors"; import blc from "broken-link-checker"; const { HtmlUrlChecker, SiteChecker } = blc; // Configuration const PORT = process.env.PORT || 3000; const HOST = process.env.HOST || "0.0.0.0"; // Create Express app const app = express(); app.use(cors()); app.use(express.json()); // Create MCP server const server = new Server( { name: "broken-link-checker-mcp", version: "1.0.0", }, { capabilities: { tools: {}, }, } ); // Helper function to check a single page's links function checkPageLinks(url, options = {}) { return new Promise((resolve, reject) => { const results = []; const errors = []; const htmlChecker = new HtmlUrlChecker(options, { link: (result) => { results.push({ url: result.url.resolved, base: result.base.resolved, html: { tagName: result.html.tagName, text: result.html.text, }, broken: result.broken, brokenReason: result.brokenReason, excluded: result.excluded, excludedReason: result.excludedReason, http: { statusCode: result.http?.response?.statusCode, }, }); }, complete: () => { resolve({ results, errors }); }, }); htmlChecker.enqueue(url); }); } // Helper function to check an entire site function checkSite(url, options = {}) { return new Promise((resolve, reject) => { const results = []; const errors = []; const pages = []; const siteChecker = new SiteChecker(options, { link: (result) => { results.push({ url: result.url.resolved, base: result.base.resolved, html: { tagName: result.html.tagName, text: result.html.text, }, broken: result.broken, brokenReason: result.brokenReason, excluded: result.excluded, excludedReason: result.excludedReason, http: { statusCode: result.http?.response?.statusCode, }, }); }, page: (error, pageUrl) => { if (error) { errors.push({ pageUrl, error: error.message }); } else { pages.push(pageUrl); } }, end: () => { resolve({ results, errors, pages }); }, }); siteChecker.enqueue(url); }); } // List available tools server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: [ { name: "check_page_links", description: "Check all links on a single HTML page for broken links. Returns detailed information about each link found including broken status, HTTP status codes, and reasons for failure.", inputSchema: { type: "object", properties: { url: { type: "string", description: "The URL of the page to check for broken links", }, excludeExternalLinks: { type: "boolean", description: "If true, only check internal links (default: false)", default: false, }, honorRobotExclusions: { type: "boolean", description: "If true, respect robots.txt and meta robots tags (default: true)", default: true, }, }, required: ["url"], }, }, { name: "check_site", description: "Recursively crawl and check all links across an entire website. This will scan multiple pages and check all internal and external links found. Use with caution on large sites as it may take significant time.", inputSchema: { type: "object", properties: { url: { type: "string", description: "The starting URL of the site to check", }, excludeExternalLinks: { type: "boolean", description: "If true, only check internal links (default: false)", default: false, }, honorRobotExclusions: { type: "boolean", description: "If true, respect robots.txt and meta robots tags (default: true)", default: true, }, maxSocketsPerHost: { type: "number", description: "Maximum concurrent requests per host (default: 1)", default: 1, }, }, required: ["url"], }, }, ], }; }); // Handle tool calls server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { if (name === "check_page_links") { const options = { excludeExternalLinks: args.excludeExternalLinks || false, honorRobotExclusions: args.honorRobotExclusions !== false, }; const result = await checkPageLinks(args.url, options); const brokenLinks = result.results.filter((link) => link.broken); const summary = { totalLinks: result.results.length, brokenLinks: brokenLinks.length, workingLinks: result.results.length - brokenLinks.length, }; return { content: [ { type: "text", text: JSON.stringify( { summary, brokenLinks, allLinks: result.results, }, null, 2 ), }, ], }; } else if (name === "check_site") { const options = { excludeExternalLinks: args.excludeExternalLinks || false, honorRobotExclusions: args.honorRobotExclusions !== false, maxSocketsPerHost: args.maxSocketsPerHost || 1, }; const result = await checkSite(args.url, options); const brokenLinks = result.results.filter((link) => link.broken); const summary = { pagesScanned: result.pages.length, totalLinks: result.results.length, brokenLinks: brokenLinks.length, workingLinks: result.results.length - brokenLinks.length, errors: result.errors.length, }; return { content: [ { type: "text", text: JSON.stringify( { summary, brokenLinks, pages: result.pages, errors: result.errors, }, null, 2 ), }, ], }; } else { throw new Error(`Unknown tool: ${name}`); } } catch (error) { return { content: [ { type: "text", text: `Error: ${error.message}`, }, ], isError: true, }; } }); // Health check endpoint app.get("/health", (req, res) => { res.json({ status: "ok", service: "broken-link-checker-mcp" }); }); // SSE endpoint for MCP app.get("/sse", async (req, res) => { console.log("New SSE connection established"); const transport = new SSEServerTransport("/message", res); await server.connect(transport); // Handle client disconnect req.on("close", () => { console.log("SSE connection closed"); }); }); // POST endpoint for MCP messages app.post("/message", async (req, res) => { // SSE transport handles the message internally res.sendStatus(200); }); // Start the server async function main() { app.listen(PORT, HOST, () => { console.log(`Broken Link Checker MCP Server running on http://${HOST}:${PORT}`); console.log(`SSE endpoint: http://${HOST}:${PORT}/sse`); console.log(`Health check: http://${HOST}:${PORT}/health`); }); } main().catch((error) => { console.error("Fatal error in main():", error); process.exit(1); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/davinoishi/broken-link-checker-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server