index.js•7.25 kB
#!/usr/bin/env node
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import {
CallToolRequestSchema,
ListToolsRequestSchema,
} from "@modelcontextprotocol/sdk/types.js";
import blc from "broken-link-checker";
const { HtmlUrlChecker, SiteChecker } = blc;
const server = new Server(
{
name: "broken-link-checker-mcp",
version: "1.0.0",
},
{
capabilities: {
tools: {},
},
}
);
// Helper function to check a single page's links
function checkPageLinks(url, options = {}) {
return new Promise((resolve, reject) => {
const results = [];
const errors = [];
const htmlChecker = new HtmlUrlChecker(options, {
link: (result) => {
results.push({
url: result.url.resolved,
base: result.base.resolved,
html: {
tagName: result.html.tagName,
text: result.html.text,
},
broken: result.broken,
brokenReason: result.brokenReason,
excluded: result.excluded,
excludedReason: result.excludedReason,
http: {
statusCode: result.http?.response?.statusCode,
},
});
},
complete: () => {
resolve({ results, errors });
},
});
htmlChecker.enqueue(url);
});
}
// Helper function to check an entire site
function checkSite(url, options = {}) {
return new Promise((resolve, reject) => {
const results = [];
const errors = [];
const pages = [];
const siteChecker = new SiteChecker(options, {
link: (result) => {
results.push({
url: result.url.resolved,
base: result.base.resolved,
html: {
tagName: result.html.tagName,
text: result.html.text,
},
broken: result.broken,
brokenReason: result.brokenReason,
excluded: result.excluded,
excludedReason: result.excludedReason,
http: {
statusCode: result.http?.response?.statusCode,
},
});
},
page: (error, pageUrl) => {
if (error) {
errors.push({ pageUrl, error: error.message });
} else {
pages.push(pageUrl);
}
},
end: () => {
resolve({ results, errors, pages });
},
});
siteChecker.enqueue(url);
});
}
// List available tools
server.setRequestHandler(ListToolsRequestSchema, async () => {
return {
tools: [
{
name: "check_page_links",
description:
"Check all links on a single HTML page for broken links. Returns detailed information about each link found including broken status, HTTP status codes, and reasons for failure.",
inputSchema: {
type: "object",
properties: {
url: {
type: "string",
description: "The URL of the page to check for broken links",
},
excludeExternalLinks: {
type: "boolean",
description:
"If true, only check internal links (default: false)",
default: false,
},
honorRobotExclusions: {
type: "boolean",
description:
"If true, respect robots.txt and meta robots tags (default: true)",
default: true,
},
},
required: ["url"],
},
},
{
name: "check_site",
description:
"Recursively crawl and check all links across an entire website. This will scan multiple pages and check all internal and external links found. Use with caution on large sites as it may take significant time.",
inputSchema: {
type: "object",
properties: {
url: {
type: "string",
description: "The starting URL of the site to check",
},
excludeExternalLinks: {
type: "boolean",
description:
"If true, only check internal links (default: false)",
default: false,
},
honorRobotExclusions: {
type: "boolean",
description:
"If true, respect robots.txt and meta robots tags (default: true)",
default: true,
},
maxSocketsPerHost: {
type: "number",
description:
"Maximum concurrent requests per host (default: 1)",
default: 1,
},
},
required: ["url"],
},
},
],
};
});
// Handle tool calls
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args } = request.params;
try {
if (name === "check_page_links") {
const options = {
excludeExternalLinks: args.excludeExternalLinks || false,
honorRobotExclusions: args.honorRobotExclusions !== false,
};
const result = await checkPageLinks(args.url, options);
const brokenLinks = result.results.filter((link) => link.broken);
const summary = {
totalLinks: result.results.length,
brokenLinks: brokenLinks.length,
workingLinks: result.results.length - brokenLinks.length,
};
return {
content: [
{
type: "text",
text: JSON.stringify(
{
summary,
brokenLinks,
allLinks: result.results,
},
null,
2
),
},
],
};
} else if (name === "check_site") {
const options = {
excludeExternalLinks: args.excludeExternalLinks || false,
honorRobotExclusions: args.honorRobotExclusions !== false,
maxSocketsPerHost: args.maxSocketsPerHost || 1,
};
const result = await checkSite(args.url, options);
const brokenLinks = result.results.filter((link) => link.broken);
const summary = {
pagesScanned: result.pages.length,
totalLinks: result.results.length,
brokenLinks: brokenLinks.length,
workingLinks: result.results.length - brokenLinks.length,
errors: result.errors.length,
};
return {
content: [
{
type: "text",
text: JSON.stringify(
{
summary,
brokenLinks,
pages: result.pages,
errors: result.errors,
},
null,
2
),
},
],
};
} else {
throw new Error(`Unknown tool: ${name}`);
}
} catch (error) {
return {
content: [
{
type: "text",
text: `Error: ${error.message}`,
},
],
isError: true,
};
}
});
// Start the server
async function main() {
const transport = new StdioServerTransport();
await server.connect(transport);
console.error("Broken Link Checker MCP server running on stdio");
}
main().catch((error) => {
console.error("Fatal error in main():", error);
process.exit(1);
});