MCP TODO Checklist Server
by hevener10
- src
- mcp_windows_website_downloader
"""
MCP Website Downloader - Windows
Downloads websites and their assets with proper Windows path handling
"""
import os
from pathlib import Path
import asyncio
import aiohttp
from bs4 import BeautifulSoup
from pathlib import Path
import logging
from typing import Any, Dict
from mcp.server.models import InitializationOptions
import mcp.types as types
from mcp.server import NotificationOptions, Server
import mcp.server.stdio
from urllib.parse import urljoin, urlparse
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger('website_downloader')
class WebsiteDownloader:
def __init__(self, save_dir: Path):
self.save_dir = save_dir
self.session = None
async def init_session(self):
if not self.session:
self.session = aiohttp.ClientSession()
async def close(self):
if self.session:
await self.session.close()
def _clean_filename(self, url: str) -> str:
"""Create valid Windows filename from URL"""
parsed = urlparse(url)
name = parsed.netloc + parsed.path.replace('/', '_')
# Remove invalid Windows filename chars
return ''.join(c for c in name if c not in '<>:"/\\|?*')
async def download(self, url: str, include_assets: bool = True) -> Dict[str, Any]:
await self.init_session()
if not self.session:
return {"status": "error", "message": "Failed to initialize session"}
try:
# Download main page
async with self.session.get(url) as response:
if response.status != 200:
return {
"status": "error",
"message": f"HTTP {response.status}: {response.reason}"
}
content = await response.text()
# Setup save directory
site_dir = self.save_dir / self._clean_filename(url)
site_dir.mkdir(parents=True, exist_ok=True)
soup = BeautifulSoup(content, 'html.parser')
assets = []
if include_assets:
# Handle images
os.makedirs(site_dir / 'images', exist_ok=True)
for img in soup.find_all('img', src=True):
try:
src = urljoin(url, img['src'])
filename = self._clean_filename(src)
save_path = site_dir / 'images' / filename
async with self.session.get(src) as resp:
if resp.status == 200:
with open(save_path, 'wb') as f:
f.write(await resp.read())
img['src'] = f'images/{filename}'
assets.append(src)
except Exception as e:
logger.warning(f"Failed to download image {src}: {e}")
# Handle CSS
os.makedirs(site_dir / 'css', exist_ok=True)
for css in soup.find_all('link', rel='stylesheet', href=True):
try:
href = urljoin(url, css['href'])
filename = self._clean_filename(href)
save_path = site_dir / 'css' / filename
async with self.session.get(href) as resp:
if resp.status == 200:
with open(save_path, 'w', encoding='utf-8') as f:
f.write(await resp.text())
css['href'] = f'css/{filename}'
assets.append(href)
except Exception as e:
logger.warning(f"Failed to download CSS {href}: {e}")
# Handle JavaScript
os.makedirs(site_dir / 'js', exist_ok=True)
for script in soup.find_all('script', src=True):
try:
src = urljoin(url, script['src'])
filename = self._clean_filename(src)
save_path = site_dir / 'js' / filename
async with self.session.get(src) as resp:
if resp.status == 200:
with open(save_path, 'w', encoding='utf-8') as f:
f.write(await resp.text())
script['src'] = f'js/{filename}'
assets.append(src)
except Exception as e:
logger.warning(f"Failed to download JS {src}: {e}")
# Save processed HTML
with open(site_dir / 'index.html', 'w', encoding='utf-8') as f:
f.write(str(soup))
return {
"status": "success",
"url": url,
"saved_to": str(site_dir),
"assets_downloaded": len(assets)
}
except Exception as e:
logger.error(f"Download failed: {e}")
return {
"status": "error",
"url": url,
"message": str(e)
}
def main():
import argparse
import os
parser = argparse.ArgumentParser(description='Website Downloader MCP Server')
parser.add_argument('--directory', type=str, default='downloads',
help='Directory to save downloaded sites')
args = parser.parse_args()
save_dir = Path(args.directory).resolve()
save_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"Saving downloads to: {save_dir}")
downloader = WebsiteDownloader(save_dir)
server = Server("website-downloader")
@server.list_tools()
async def handle_list_tools() -> list[types.Tool]:
return [
types.Tool(
name="download-website",
description="Download a website with its assets",
inputSchema={
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Website URL to download"
},
"include_assets": {
"type": "boolean",
"description": "Download images, CSS, JS",
"default": True
}
},
"required": ["url"]
}
)
]
@server.call_tool()
async def handle_call_tool(
name: str,
arguments: Dict[str, Any] | None
) -> list[types.TextContent]:
try:
if not arguments:
raise ValueError("Arguments required")
if name == "download-website":
result = await downloader.download(
arguments["url"],
arguments.get("include_assets", True)
)
return [types.TextContent(
type="text",
text=str(result)
)]
else:
raise ValueError(f"Unknown tool: {name}")
except Exception as e:
logger.error(f"Tool error: {e}")
return [types.TextContent(
type="text",
text=f"Error: {str(e)}"
)]
async def run_server():
async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
try:
await server.run(
read_stream,
write_stream,
InitializationOptions(
server_name="website-downloader",
server_version="0.1.0",
capabilities=server.get_capabilities(
notification_options=NotificationOptions(),
experimental_capabilities={},
),
),
)
finally:
await downloader.close()
asyncio.run(run_server())
if __name__ == '__main__':
main()