ScrapeGraph MCP Server
Official
- src
- scrapegraph_mcp
#!/usr/bin/env python3
"""
MCP server for ScapeGraph API integration.
This server exposes methods to use ScapeGraph's AI-powered web scraping services:
- markdownify: Convert any webpage into clean, formatted markdown
- smartscraper: Extract structured data from any webpage using AI
- searchscraper: Perform AI-powered web searches with structured results
"""
import os
from typing import Any, Dict
import httpx
from mcp.server.fastmcp import FastMCP
class ScapeGraphClient:
"""Client for interacting with the ScapeGraph API."""
BASE_URL = "https://api.scrapegraphai.com/v1"
def __init__(self, api_key: str):
"""
Initialize the ScapeGraph API client.
Args:
api_key: API key for ScapeGraph API
"""
self.api_key = api_key
self.headers = {
"SGAI-APIKEY": api_key,
"Content-Type": "application/json"
}
self.client = httpx.Client(timeout=60.0)
def markdownify(self, website_url: str) -> Dict[str, Any]:
"""
Convert a webpage into clean, formatted markdown.
Args:
website_url: URL of the webpage to convert
Returns:
Dictionary containing the markdown result
"""
url = f"{self.BASE_URL}/markdownify"
data = {
"website_url": website_url
}
response = self.client.post(url, headers=self.headers, json=data)
if response.status_code != 200:
error_msg = f"Error {response.status_code}: {response.text}"
raise Exception(error_msg)
return response.json()
def smartscraper(self, user_prompt: str, website_url: str) -> Dict[str, Any]:
"""
Extract structured data from a webpage using AI.
Args:
user_prompt: Instructions for what data to extract
website_url: URL of the webpage to scrape
Returns:
Dictionary containing the extracted data
"""
url = f"{self.BASE_URL}/smartscraper"
data = {
"user_prompt": user_prompt,
"website_url": website_url
}
response = self.client.post(url, headers=self.headers, json=data)
if response.status_code != 200:
error_msg = f"Error {response.status_code}: {response.text}"
raise Exception(error_msg)
return response.json()
def searchscraper(self, user_prompt: str) -> Dict[str, Any]:
"""
Perform AI-powered web searches with structured results.
Args:
user_prompt: Search query or instructions
Returns:
Dictionary containing search results and reference URLs
"""
url = f"{self.BASE_URL}/searchscraper"
data = {
"user_prompt": user_prompt
}
response = self.client.post(url, headers=self.headers, json=data)
if response.status_code != 200:
error_msg = f"Error {response.status_code}: {response.text}"
raise Exception(error_msg)
return response.json()
def close(self) -> None:
"""Close the HTTP client."""
self.client.close()
# Create MCP server
mcp = FastMCP("ScapeGraph API MCP Server")
# Default API key (will be overridden in main or by direct assignment)
default_api_key = os.environ.get("SGAI_API_KEY")
scrapegraph_client = ScapeGraphClient(default_api_key) if default_api_key else None
# Add tool for markdownify
@mcp.tool()
def markdownify(website_url: str) -> Dict[str, Any]:
"""
Convert a webpage into clean, formatted markdown.
Args:
website_url: URL of the webpage to convert
Returns:
Dictionary containing the markdown result
"""
if scrapegraph_client is None:
return {"error": "ScapeGraph client not initialized. Please provide an API key."}
try:
return scrapegraph_client.markdownify(website_url)
except Exception as e:
return {"error": str(e)}
# Add tool for smartscraper
@mcp.tool()
def smartscraper(
user_prompt: str,
website_url: str
) -> Dict[str, Any]:
"""
Extract structured data from a webpage using AI.
Args:
user_prompt: Instructions for what data to extract
website_url: URL of the webpage to scrape
Returns:
Dictionary containing the extracted data
"""
if scrapegraph_client is None:
return {"error": "ScapeGraph client not initialized. Please provide an API key."}
try:
return scrapegraph_client.smartscraper(user_prompt, website_url)
except Exception as e:
return {"error": str(e)}
# Add tool for searchscraper
@mcp.tool()
def searchscraper(
user_prompt: str
) -> Dict[str, Any]:
"""
Perform AI-powered web searches with structured results.
Args:
user_prompt: Search query or instructions
Returns:
Dictionary containing search results and reference URLs
"""
if scrapegraph_client is None:
return {"error": "ScapeGraph client not initialized. Please provide an API key."}
try:
return scrapegraph_client.searchscraper(user_prompt)
except Exception as e:
return {"error": str(e)}
def main() -> None:
"""Run the ScapeGraph MCP server."""
print("Starting ScapeGraph MCP server!")
# Run the server
mcp.run(transport="stdio")
if __name__ == "__main__":
main()