Doc Scraper MCP Server

mcp_doc_scraper
import logging
import os
import aiohttp
from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.types import TextContent, Tool
from pydantic import BaseModel


class ScrapeDocsInput(BaseModel):
    url: str
    output_path: str


async def serve() -> None:
    logger = logging.getLogger(__name__)
    server = Server("doc-scraper")

    @server.list_tools()
    async def list_tools() -> list[Tool]:
        return [
            Tool(
                name="scrape_docs",
                description="Scrape documentation from a URL and save as markdown",
                inputSchema=ScrapeDocsInput.schema(),
            )
        ]

    @server.call_tool()
    async def call_tool(name: str, arguments: dict) -> list[TextContent]:
        if name != "scrape_docs":
            raise ValueError(f"Unknown tool: {name}")
        url = arguments["url"]
        output_path = arguments["output_path"]
        try:
            # Use jina.ai to convert URL to markdown
            jina_url = f"https://r.jina.ai/{url}"
            async with aiohttp.ClientSession() as session:
                async with session.get(jina_url) as response:
                    if response.status != 200:
                        return [
                            TextContent(
                                type="text",
                                text=f"Failed to fetch content: {response.status}",
                            )
                        ]
                    content = await response.text()
            # Ensure output directory exists
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            # Save markdown content
            with open(output_path, "w", encoding="utf-8") as f:
                f.write(content)
            return [
                TextContent(
                    type="text",
                    text=f"Successfully scraped docs from {url} and saved to {output_path}",
                )
            ]
        except Exception as e:
            logger.exception("Error while scraping documentation.")
            return [TextContent(type="text", text=f"Error: {str(e)}")]

    options = server.create_initialization_options()
    async with stdio_server() as (read_stream, write_stream):
        await server.run(read_stream, write_stream, options, raise_exceptions=True)