Doc Scraper MCP Server

  • mcp_doc_scraper
import logging import os import aiohttp from mcp.server import Server from mcp.server.stdio import stdio_server from mcp.types import TextContent, Tool from pydantic import BaseModel class ScrapeDocsInput(BaseModel): url: str output_path: str async def serve() -> None: logger = logging.getLogger(__name__) server = Server("doc-scraper") @server.list_tools() async def list_tools() -> list[Tool]: return [ Tool( name="scrape_docs", description="Scrape documentation from a URL and save as markdown", inputSchema=ScrapeDocsInput.schema(), ) ] @server.call_tool() async def call_tool(name: str, arguments: dict) -> list[TextContent]: if name != "scrape_docs": raise ValueError(f"Unknown tool: {name}") url = arguments["url"] output_path = arguments["output_path"] try: # Use jina.ai to convert URL to markdown jina_url = f"https://r.jina.ai/{url}" async with aiohttp.ClientSession() as session: async with session.get(jina_url) as response: if response.status != 200: return [ TextContent( type="text", text=f"Failed to fetch content: {response.status}", ) ] content = await response.text() # Ensure output directory exists os.makedirs(os.path.dirname(output_path), exist_ok=True) # Save markdown content with open(output_path, "w", encoding="utf-8") as f: f.write(content) return [ TextContent( type="text", text=f"Successfully scraped docs from {url} and saved to {output_path}", ) ] except Exception as e: logger.exception("Error while scraping documentation.") return [TextContent(type="text", text=f"Error: {str(e)}")] options = server.create_initialization_options() async with stdio_server() as (read_stream, write_stream): await server.run(read_stream, write_stream, options, raise_exceptions=True)