#!/usr/bin/env python3
"""
Website Scraper MCP MCP Server - FastMCP with D402 Transport Wrapper
Uses FastMCP from official MCP SDK with D402MCPTransport wrapper for HTTP 402.
Architecture:
- FastMCP for tool decorators and Context objects
- D402MCPTransport wraps the /mcp route for HTTP 402 interception
- Proper HTTP 402 status codes (not JSON-RPC wrapped)
Generated from OpenAPI: None
Environment Variables:
- WEB_SCRAPPING_API_KEY: Server's internal API key (for paid requests)
- SERVER_ADDRESS: Payment address (IATP wallet contract)
- MCP_OPERATOR_PRIVATE_KEY: Operator signing key
- D402_TESTING_MODE: Skip facilitator (default: true)
"""
import os
import logging
import sys
from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union
from datetime import datetime
import requests
from retry import retry
from dotenv import load_dotenv
import uvicorn
load_dotenv()
# Configure logging
logging.basicConfig(
level=os.getenv("LOG_LEVEL", "INFO").upper(),
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('website-scraper-mcp_mcp')
# FastMCP from official SDK
from mcp.server.fastmcp import FastMCP, Context
from starlette.requests import Request
from starlette.responses import JSONResponse
from starlette.middleware.cors import CORSMiddleware
# D402 payment protocol - using Starlette middleware
from traia_iatp.d402.starlette_middleware import D402PaymentMiddleware
from traia_iatp.d402.mcp_middleware import require_payment_for_tool, get_active_api_key
from traia_iatp.d402.payment_introspection import extract_payment_configs_from_mcp
from traia_iatp.d402.types import TokenAmount, TokenAsset, EIP712Domain
# Configuration
STAGE = os.getenv("STAGE", "MAINNET").upper()
PORT = int(os.getenv("PORT", "8000"))
SERVER_ADDRESS = os.getenv("SERVER_ADDRESS")
if not SERVER_ADDRESS:
raise ValueError("SERVER_ADDRESS required for payment protocol")
API_KEY = os.getenv("WEB_SCRAPPING_API_KEY")
if not API_KEY:
logger.warning(f"⚠️ WEB_SCRAPPING_API_KEY not set - payment required for all requests")
logger.info("="*80)
logger.info(f"Website Scraper MCP MCP Server (FastMCP + D402 Wrapper)")
logger.info(f"API: https://www.crummy.com/software/BeautifulSoup")
logger.info(f"Payment: {SERVER_ADDRESS}")
logger.info(f"API Key: {'✅' if API_KEY else '❌ Payment required'}")
logger.info("="*80)
# Create FastMCP server
mcp = FastMCP("Website Scraper MCP MCP Server", host="0.0.0.0")
logger.info(f"✅ FastMCP server created")
# ============================================================================
# TOOL IMPLEMENTATIONS
# ============================================================================
# Tool implementations will be added here by endpoint_implementer_crew
# Each tool will use the @mcp.tool() and @require_payment_for_tool() decorators
# D402 Payment Middleware
# The HTTP 402 payment protocol middleware is already configured in the server initialization.
# It's imported from traia_iatp.d402.mcp_middleware and auto-detects configuration from:
# - PAYMENT_ADDRESS or EVM_ADDRESS: Where to receive payments
# - EVM_NETWORK: Blockchain network (default: base-sepolia)
# - DEFAULT_PRICE_USD: Price per request (default: $0.001)
# - WEBSITE_SCRAPER_MCP_API_KEY: Server's internal API key for payment mode
#
# All payment verification logic is handled by the traia_iatp.d402 module.
# No custom implementation needed!
# API Endpoint Tool Implementations
@mcp.tool()
@require_payment_for_tool(
price=TokenAmount(
amount="10000000000000", # 1e-05 tokens
asset=TokenAsset(
address="0x3e17730bb2ca51a8D5deD7E44c003A2e95a4d822",
decimals=6,
network="sepolia",
eip712=EIP712Domain(
name="IATPWallet",
version="1"
)
)
),
description="Scrape text content from a website URL. Extracts c"
)
async def scrape_website(
context: Context,
website_url: str,
timeout: int = 15,
include_metadata: bool = True
) -> Any:
"""
Scrape text content from a website URL. Extracts clean text content from any website URL by removing HTML tags and formatting. Useful for reading articles, documentation, or any web content.
Generated from OpenAPI endpoint: POST /scraper/website
Args:
context: MCP context (auto-injected by framework, not user-provided)
website_url: The URL of the website to scrape (must include http:// or https://) (optional) Examples: "https://example.com", "https://news.ycombinator.com", "https://en.wikipedia.org/wiki/Artificial_intelligence"
timeout: Request timeout in seconds (max 60) (optional, default: 15)
include_metadata: Whether to include page metadata in response (optional, default: True)
Returns:
API response (dict, list, or other JSON type)
Example Usage:
await scrape_website(website_url="https://example.com")
Note: 'context' parameter is auto-injected by MCP framework
"""
# Payment already verified by @require_payment_for_tool decorator
# Get API key using helper (handles request.state fallback)
api_key = get_active_api_key(context)
try:
url = f"https://www.crummy.com/software/BeautifulSoup/scraper/website"
params = {}
headers = {}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
response = requests.post(
url,
json={k: v for k, v in {
"website_url": website_url,
"timeout": timeout,
"include_metadata": include_metadata,
}.items() if v is not None},
params=params,
headers=headers,
timeout=30
)
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Error in scrape_website: {e}")
return {"error": str(e), "endpoint": "/scraper/website"}
@mcp.tool()
@require_payment_for_tool(
price=TokenAmount(
amount="10000000000000", # 1e-05 tokens
asset=TokenAsset(
address="0x3e17730bb2ca51a8D5deD7E44c003A2e95a4d822",
decimals=6,
network="sepolia",
eip712=EIP712Domain(
name="IATPWallet",
version="1"
)
)
),
description="Scrape website content with custom headers for sit"
)
async def scrape_website_custom_headers(
context: Context,
website_url: str,
user_agent: str = "",
referer: str = "",
timeout: int = 15
) -> Any:
"""
Scrape website content with custom headers for sites that require specific user agents. Some websites block requests or return different content based on headers. This tool allows customizing the User-Agent and Referer headers.
Generated from OpenAPI endpoint: POST /scraper/website/custom-headers
Args:
context: MCP context (auto-injected by framework, not user-provided)
website_url: The URL of the website to scrape (must include http:// or https://) (optional) Examples: "https://example.com", "https://www.reddit.com", "https://medium.com"
user_agent: Custom User-Agent string (optional, uses default if empty) (optional, default: "") Examples: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
referer: Custom Referer header (optional) (optional, default: "") Examples: "https://www.google.com/", "https://twitter.com/"
timeout: Request timeout in seconds (max 60) (optional, default: 15)
Returns:
API response (dict, list, or other JSON type)
Example Usage:
await scrape_website_custom_headers(website_url="https://example.com")
Note: 'context' parameter is auto-injected by MCP framework
"""
# Payment already verified by @require_payment_for_tool decorator
# Get API key using helper (handles request.state fallback)
api_key = get_active_api_key(context)
try:
url = f"https://www.crummy.com/software/BeautifulSoup/scraper/website/custom-headers"
params = {}
headers = {}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
response = requests.post(
url,
json={k: v for k, v in {
"website_url": website_url,
"user_agent": user_agent,
"referer": referer,
"timeout": timeout,
}.items() if v is not None},
params=params,
headers=headers,
timeout=30
)
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Error in scrape_website_custom_headers: {e}")
return {"error": str(e), "endpoint": "/scraper/website/custom-headers"}
@mcp.tool()
@require_payment_for_tool(
price=TokenAmount(
amount="10000000000000", # 1e-05 tokens
asset=TokenAsset(
address="0x3e17730bb2ca51a8D5deD7E44c003A2e95a4d822",
decimals=6,
network="sepolia",
eip712=EIP712Domain(
name="IATPWallet",
version="1"
)
)
),
description="Get information about the Website Scraper service "
)
async def get_scraper_info(
context: Context
) -> Any:
"""
Get information about the Website Scraper service including supported formats, features, and limitations.
Generated from OpenAPI endpoint: GET /scraper/info
Args:
context: MCP context (auto-injected by framework, not user-provided)
Returns:
API response (dict, list, or other JSON type)
Example Usage:
await get_scraper_info()
Note: 'context' parameter is auto-injected by MCP framework
"""
# Payment already verified by @require_payment_for_tool decorator
# Get API key using helper (handles request.state fallback)
api_key = get_active_api_key(context)
try:
url = f"https://www.crummy.com/software/BeautifulSoup/scraper/info"
params = {}
headers = {}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
response = requests.get(
url,
params=params,
headers=headers,
timeout=30
)
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Error in get_scraper_info: {e}")
return {"error": str(e), "endpoint": "/scraper/info"}
# TODO: Add your API-specific functions here
# ============================================================================
# APPLICATION SETUP WITH STARLETTE MIDDLEWARE
# ============================================================================
def create_app_with_middleware():
"""
Create Starlette app with d402 payment middleware.
Strategy:
1. Get FastMCP's Starlette app via streamable_http_app()
2. Extract payment configs from @require_payment_for_tool decorators
3. Add Starlette middleware with extracted configs
4. Single source of truth - no duplication!
"""
logger.info("🔧 Creating FastMCP app with middleware...")
# Get FastMCP's Starlette app
app = mcp.streamable_http_app()
logger.info(f"✅ Got FastMCP Starlette app")
# Extract payment configs from decorators (single source of truth!)
tool_payment_configs = extract_payment_configs_from_mcp(mcp, SERVER_ADDRESS)
logger.info(f"📊 Extracted {len(tool_payment_configs)} payment configs from @require_payment_for_tool decorators")
# D402 Configuration
facilitator_url = os.getenv("FACILITATOR_URL") or os.getenv("D402_FACILITATOR_URL")
operator_key = os.getenv("MCP_OPERATOR_PRIVATE_KEY")
network = os.getenv("NETWORK", "sepolia")
testing_mode = os.getenv("D402_TESTING_MODE", "false").lower() == "true"
# Log D402 configuration with prominent facilitator info
logger.info("="*60)
logger.info("D402 Payment Protocol Configuration:")
logger.info(f" Server Address: {SERVER_ADDRESS}")
logger.info(f" Network: {network}")
logger.info(f" Operator Key: {'✅ Set' if operator_key else '❌ Not set'}")
logger.info(f" Testing Mode: {'⚠️ ENABLED (bypasses facilitator)' if testing_mode else '✅ DISABLED (uses facilitator)'}")
logger.info("="*60)
if not facilitator_url and not testing_mode:
logger.error("❌ FACILITATOR_URL required when testing_mode is disabled!")
raise ValueError("Set FACILITATOR_URL or enable D402_TESTING_MODE=true")
if facilitator_url:
logger.info(f"🌐 FACILITATOR: {facilitator_url}")
if "localhost" in facilitator_url or "127.0.0.1" in facilitator_url or "host.docker.internal" in facilitator_url:
logger.info(f" 📍 Using LOCAL facilitator for development")
else:
logger.info(f" 🌍 Using REMOTE facilitator for production")
else:
logger.warning("⚠️ D402 Testing Mode - Facilitator bypassed")
logger.info("="*60)
# Add CORS middleware first (processes before other middleware)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allow all origins
allow_credentials=True,
allow_methods=["*"], # Allow all methods
allow_headers=["*"], # Allow all headers
expose_headers=["mcp-session-id"], # Expose custom headers to browser
)
logger.info("✅ Added CORS middleware (allow all origins, expose mcp-session-id)")
# Add D402 payment middleware with extracted configs
app.add_middleware(
D402PaymentMiddleware,
tool_payment_configs=tool_payment_configs,
server_address=SERVER_ADDRESS,
requires_auth=True, # Extracts API keys + checks payment
internal_api_key=API_KEY, # Server's internal key (for Mode 2: paid access)
testing_mode=testing_mode,
facilitator_url=facilitator_url,
facilitator_api_key=os.getenv("D402_FACILITATOR_API_KEY"),
server_name="website-scraper-mcp-mcp-server" # MCP server ID for tracking
)
logger.info("✅ Added D402PaymentMiddleware")
logger.info(" - Auth extraction: Enabled")
logger.info(" - Dual mode: API key OR payment")
# Add health check endpoint (bypasses middleware)
@app.route("/health", methods=["GET"])
async def health_check(request: Request) -> JSONResponse:
"""Health check endpoint for container orchestration."""
return JSONResponse(
content={
"status": "healthy",
"service": "website-scraper-mcp-mcp-server",
"timestamp": datetime.now().isoformat()
}
)
logger.info("✅ Added /health endpoint")
return app
if __name__ == "__main__":
logger.info("="*80)
logger.info(f"Starting Website Scraper MCP MCP Server")
logger.info("="*80)
logger.info("Architecture:")
logger.info(" 1. D402PaymentMiddleware intercepts requests")
logger.info(" - Extracts API keys from Authorization header")
logger.info(" - Checks payment → HTTP 402 if no API key AND no payment")
logger.info(" 2. FastMCP processes valid requests with tool decorators")
logger.info("="*80)
# Create app with middleware
app = create_app_with_middleware()
# Run with uvicorn
uvicorn.run(
app,
host="0.0.0.0",
port=PORT,
log_level=os.getenv("LOG_LEVEL", "info").lower()
)