server.js•8.22 kB
#!/usr/bin/env node
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
import {
  CallToolRequestSchema,
  ListToolsRequestSchema,
} from "@modelcontextprotocol/sdk/types.js";
import express from "express";
import cors from "cors";
import blc from "broken-link-checker";
const { HtmlUrlChecker, SiteChecker } = blc;
// Configuration
const PORT = process.env.PORT || 3000;
const HOST = process.env.HOST || "0.0.0.0";
// Create Express app
const app = express();
app.use(cors());
app.use(express.json());
// Create MCP server
const server = new Server(
  {
    name: "broken-link-checker-mcp",
    version: "1.0.0",
  },
  {
    capabilities: {
      tools: {},
    },
  }
);
// Helper function to check a single page's links
function checkPageLinks(url, options = {}) {
  return new Promise((resolve, reject) => {
    const results = [];
    const errors = [];
    const htmlChecker = new HtmlUrlChecker(options, {
      link: (result) => {
        results.push({
          url: result.url.resolved,
          base: result.base.resolved,
          html: {
            tagName: result.html.tagName,
            text: result.html.text,
          },
          broken: result.broken,
          brokenReason: result.brokenReason,
          excluded: result.excluded,
          excludedReason: result.excludedReason,
          http: {
            statusCode: result.http?.response?.statusCode,
          },
        });
      },
      complete: () => {
        resolve({ results, errors });
      },
    });
    htmlChecker.enqueue(url);
  });
}
// Helper function to check an entire site
function checkSite(url, options = {}) {
  return new Promise((resolve, reject) => {
    const results = [];
    const errors = [];
    const pages = [];
    const siteChecker = new SiteChecker(options, {
      link: (result) => {
        results.push({
          url: result.url.resolved,
          base: result.base.resolved,
          html: {
            tagName: result.html.tagName,
            text: result.html.text,
          },
          broken: result.broken,
          brokenReason: result.brokenReason,
          excluded: result.excluded,
          excludedReason: result.excludedReason,
          http: {
            statusCode: result.http?.response?.statusCode,
          },
        });
      },
      page: (error, pageUrl) => {
        if (error) {
          errors.push({ pageUrl, error: error.message });
        } else {
          pages.push(pageUrl);
        }
      },
      end: () => {
        resolve({ results, errors, pages });
      },
    });
    siteChecker.enqueue(url);
  });
}
// List available tools
server.setRequestHandler(ListToolsRequestSchema, async () => {
  return {
    tools: [
      {
        name: "check_page_links",
        description:
          "Check all links on a single HTML page for broken links. Returns detailed information about each link found including broken status, HTTP status codes, and reasons for failure.",
        inputSchema: {
          type: "object",
          properties: {
            url: {
              type: "string",
              description: "The URL of the page to check for broken links",
            },
            excludeExternalLinks: {
              type: "boolean",
              description:
                "If true, only check internal links (default: false)",
              default: false,
            },
            honorRobotExclusions: {
              type: "boolean",
              description:
                "If true, respect robots.txt and meta robots tags (default: true)",
              default: true,
            },
          },
          required: ["url"],
        },
      },
      {
        name: "check_site",
        description:
          "Recursively crawl and check all links across an entire website. This will scan multiple pages and check all internal and external links found. Use with caution on large sites as it may take significant time.",
        inputSchema: {
          type: "object",
          properties: {
            url: {
              type: "string",
              description: "The starting URL of the site to check",
            },
            excludeExternalLinks: {
              type: "boolean",
              description:
                "If true, only check internal links (default: false)",
              default: false,
            },
            honorRobotExclusions: {
              type: "boolean",
              description:
                "If true, respect robots.txt and meta robots tags (default: true)",
              default: true,
            },
            maxSocketsPerHost: {
              type: "number",
              description:
                "Maximum concurrent requests per host (default: 1)",
              default: 1,
            },
          },
          required: ["url"],
        },
      },
    ],
  };
});
// Handle tool calls
server.setRequestHandler(CallToolRequestSchema, async (request) => {
  const { name, arguments: args } = request.params;
  try {
    if (name === "check_page_links") {
      const options = {
        excludeExternalLinks: args.excludeExternalLinks || false,
        honorRobotExclusions: args.honorRobotExclusions !== false,
      };
      const result = await checkPageLinks(args.url, options);
      const brokenLinks = result.results.filter((link) => link.broken);
      const summary = {
        totalLinks: result.results.length,
        brokenLinks: brokenLinks.length,
        workingLinks: result.results.length - brokenLinks.length,
      };
      return {
        content: [
          {
            type: "text",
            text: JSON.stringify(
              {
                summary,
                brokenLinks,
                allLinks: result.results,
              },
              null,
              2
            ),
          },
        ],
      };
    } else if (name === "check_site") {
      const options = {
        excludeExternalLinks: args.excludeExternalLinks || false,
        honorRobotExclusions: args.honorRobotExclusions !== false,
        maxSocketsPerHost: args.maxSocketsPerHost || 1,
      };
      const result = await checkSite(args.url, options);
      const brokenLinks = result.results.filter((link) => link.broken);
      const summary = {
        pagesScanned: result.pages.length,
        totalLinks: result.results.length,
        brokenLinks: brokenLinks.length,
        workingLinks: result.results.length - brokenLinks.length,
        errors: result.errors.length,
      };
      return {
        content: [
          {
            type: "text",
            text: JSON.stringify(
              {
                summary,
                brokenLinks,
                pages: result.pages,
                errors: result.errors,
              },
              null,
              2
            ),
          },
        ],
      };
    } else {
      throw new Error(`Unknown tool: ${name}`);
    }
  } catch (error) {
    return {
      content: [
        {
          type: "text",
          text: `Error: ${error.message}`,
        },
      ],
      isError: true,
    };
  }
});
// Health check endpoint
app.get("/health", (req, res) => {
  res.json({ status: "ok", service: "broken-link-checker-mcp" });
});
// SSE endpoint for MCP
app.get("/sse", async (req, res) => {
  console.log("New SSE connection established");
  const transport = new SSEServerTransport("/message", res);
  await server.connect(transport);
  // Handle client disconnect
  req.on("close", () => {
    console.log("SSE connection closed");
  });
});
// POST endpoint for MCP messages
app.post("/message", async (req, res) => {
  // SSE transport handles the message internally
  res.sendStatus(200);
});
// Start the server
async function main() {
  app.listen(PORT, HOST, () => {
    console.log(`Broken Link Checker MCP Server running on http://${HOST}:${PORT}`);
    console.log(`SSE endpoint: http://${HOST}:${PORT}/sse`);
    console.log(`Health check: http://${HOST}:${PORT}/health`);
  });
}
main().catch((error) => {
  console.error("Fatal error in main():", error);
  process.exit(1);
});