import { z } from "zod";
import * as cheerio from "cheerio";
import TurndownService from "turndown";
/**
* Web Research Tool
* Faciliates reading and processing documentation from the web.
*/
export const researchDocumentationSchema = {
name: "research_documentation",
description:
"Fetches and analyzes documentation from a given URL. Converts HTML content to clean Markdown for easier reading by LLMs. optimizing for documentation sites.",
inputSchema: z.object({
url: z
.string()
.url()
.describe("The full URL of the documentation page to read"),
selector: z
.string()
.optional()
.describe(
"Optional CSS selector to target specific content (e.g. 'main', 'article', '.content'). Defaults to intelligent detection.",
),
}),
};
export async function researchDocumentationHandler(args: {
url: string;
selector?: string;
}) {
const { url, selector } = args;
try {
const response = await fetch(url, {
headers: {
"User-Agent":
"Code-MCP/1.0 (Research Agent; +https://github.com/millsydotdev/Code-MCP)",
},
});
if (!response.ok) {
throw new Error(
`Failed to fetch URL: ${response.status} ${response.statusText}`,
);
}
const html = await response.text();
const $ = cheerio.load(html);
// Remove noise
$("script, style, nav, footer, iframe, noscript").remove();
let contentHtml = "";
if (selector) {
contentHtml = $(selector).html() || "";
} else {
// Intelligent detection for docs
const candidates = [
"main",
"article",
".markdown-body",
"#content",
".documentation",
];
for (const sel of candidates) {
const el = $(sel);
if (el.length > 0) {
contentHtml = el.html() || "";
break;
}
}
// Fallback to body if nothing else found
if (!contentHtml) {
contentHtml = $("body").html() || "";
}
}
if (!contentHtml) {
return {
content: [
{
type: "text",
text: "⚠️ No suitable content found on the page. Try specifying a selector.",
},
],
isError: true,
};
}
const turndownService = new TurndownService({
headingStyle: "atx",
codeBlockStyle: "fenced",
});
const markdown = turndownService.turndown(contentHtml);
return {
content: [
{
type: "text",
text: `# Research Result: ${url}\n\n${markdown}`,
},
],
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
content: [
{
type: "text",
text: `❌ Error fetching documentation: ${errorMessage}`,
},
],
isError: true,
};
}
}