MCP TODO Checklist Server

  • src
  • mcp_windows_website_downloader
""" MCP Website Downloader - Windows Downloads websites and their assets with proper Windows path handling """ import os from pathlib import Path import asyncio import aiohttp from bs4 import BeautifulSoup from pathlib import Path import logging from typing import Any, Dict from mcp.server.models import InitializationOptions import mcp.types as types from mcp.server import NotificationOptions, Server import mcp.server.stdio from urllib.parse import urljoin, urlparse logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger('website_downloader') class WebsiteDownloader: def __init__(self, save_dir: Path): self.save_dir = save_dir self.session = None async def init_session(self): if not self.session: self.session = aiohttp.ClientSession() async def close(self): if self.session: await self.session.close() def _clean_filename(self, url: str) -> str: """Create valid Windows filename from URL""" parsed = urlparse(url) name = parsed.netloc + parsed.path.replace('/', '_') # Remove invalid Windows filename chars return ''.join(c for c in name if c not in '<>:"/\\|?*') async def download(self, url: str, include_assets: bool = True) -> Dict[str, Any]: await self.init_session() if not self.session: return {"status": "error", "message": "Failed to initialize session"} try: # Download main page async with self.session.get(url) as response: if response.status != 200: return { "status": "error", "message": f"HTTP {response.status}: {response.reason}" } content = await response.text() # Setup save directory site_dir = self.save_dir / self._clean_filename(url) site_dir.mkdir(parents=True, exist_ok=True) soup = BeautifulSoup(content, 'html.parser') assets = [] if include_assets: # Handle images os.makedirs(site_dir / 'images', exist_ok=True) for img in soup.find_all('img', src=True): try: src = urljoin(url, img['src']) filename = self._clean_filename(src) save_path = site_dir / 'images' / filename async with self.session.get(src) as resp: if resp.status == 200: with open(save_path, 'wb') as f: f.write(await resp.read()) img['src'] = f'images/{filename}' assets.append(src) except Exception as e: logger.warning(f"Failed to download image {src}: {e}") # Handle CSS os.makedirs(site_dir / 'css', exist_ok=True) for css in soup.find_all('link', rel='stylesheet', href=True): try: href = urljoin(url, css['href']) filename = self._clean_filename(href) save_path = site_dir / 'css' / filename async with self.session.get(href) as resp: if resp.status == 200: with open(save_path, 'w', encoding='utf-8') as f: f.write(await resp.text()) css['href'] = f'css/{filename}' assets.append(href) except Exception as e: logger.warning(f"Failed to download CSS {href}: {e}") # Handle JavaScript os.makedirs(site_dir / 'js', exist_ok=True) for script in soup.find_all('script', src=True): try: src = urljoin(url, script['src']) filename = self._clean_filename(src) save_path = site_dir / 'js' / filename async with self.session.get(src) as resp: if resp.status == 200: with open(save_path, 'w', encoding='utf-8') as f: f.write(await resp.text()) script['src'] = f'js/{filename}' assets.append(src) except Exception as e: logger.warning(f"Failed to download JS {src}: {e}") # Save processed HTML with open(site_dir / 'index.html', 'w', encoding='utf-8') as f: f.write(str(soup)) return { "status": "success", "url": url, "saved_to": str(site_dir), "assets_downloaded": len(assets) } except Exception as e: logger.error(f"Download failed: {e}") return { "status": "error", "url": url, "message": str(e) } def main(): import argparse import os parser = argparse.ArgumentParser(description='Website Downloader MCP Server') parser.add_argument('--directory', type=str, default='downloads', help='Directory to save downloaded sites') args = parser.parse_args() save_dir = Path(args.directory).resolve() save_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Saving downloads to: {save_dir}") downloader = WebsiteDownloader(save_dir) server = Server("website-downloader") @server.list_tools() async def handle_list_tools() -> list[types.Tool]: return [ types.Tool( name="download-website", description="Download a website with its assets", inputSchema={ "type": "object", "properties": { "url": { "type": "string", "description": "Website URL to download" }, "include_assets": { "type": "boolean", "description": "Download images, CSS, JS", "default": True } }, "required": ["url"] } ) ] @server.call_tool() async def handle_call_tool( name: str, arguments: Dict[str, Any] | None ) -> list[types.TextContent]: try: if not arguments: raise ValueError("Arguments required") if name == "download-website": result = await downloader.download( arguments["url"], arguments.get("include_assets", True) ) return [types.TextContent( type="text", text=str(result) )] else: raise ValueError(f"Unknown tool: {name}") except Exception as e: logger.error(f"Tool error: {e}") return [types.TextContent( type="text", text=f"Error: {str(e)}" )] async def run_server(): async with mcp.server.stdio.stdio_server() as (read_stream, write_stream): try: await server.run( read_stream, write_stream, InitializationOptions( server_name="website-downloader", server_version="0.1.0", capabilities=server.get_capabilities( notification_options=NotificationOptions(), experimental_capabilities={}, ), ), ) finally: await downloader.close() asyncio.run(run_server()) if __name__ == '__main__': main()