Scrapezy
Official
by Scrapezy
- src
#!/usr/bin/env node
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
const SCRAPEZY_API = "https://scrapezy.com/api/";
// Parse command line arguments
function parseArgs() {
const args = process.argv.slice(2);
const options: Record<string, string> = {};
for (const arg of args) {
if (arg.startsWith('--')) {
const [key, value] = arg.slice(2).split('=');
if (key && value) {
options[key] = value;
}
}
}
return options;
}
// Get the API key from command line args or environment variables
function getScrapezyApiKey(): string {
const args = parseArgs();
const apiKey = args['api-key'] || process.env.SCRAPEZY_API_KEY;
if (!apiKey) {
console.error("Error: SCRAPEZY_API_KEY is required. Either set it as an environment variable or pass it using --api-key=YOUR_KEY");
process.exit(1);
}
return apiKey;
}
const server = new McpServer(
{
name: "scrapezy-mcp-server",
version: "0.1.1",
}
);
async function callScrapezyApi(url: string, prompt: string) {
const apiKey = getScrapezyApiKey();
// Step 1: Submit the extraction job
const submitResponse = await fetch(`${SCRAPEZY_API}/extract`, {
method: "POST",
headers: {
"Content-Type": "application/json",
"x-api-key": apiKey,
},
body: JSON.stringify({ url, prompt }),
});
const jobData = await submitResponse.json();
if (!jobData.jobId) {
return { error: "Failed to submit extraction job" };
}
// Step 2: Poll for results
const maxAttempts = 30; // Maximum number of polling attempts
const pollingInterval = 2000; // 2 seconds between polling attempts
let attempts = 0;
while (attempts < maxAttempts) {
attempts++;
// Wait for the polling interval
await new Promise(resolve => setTimeout(resolve, pollingInterval));
// Poll for job status
const pollResponse = await fetch(`${SCRAPEZY_API}/extract/${jobData.jobId}`, {
method: "GET",
headers: {
"Content-Type": "application/json",
"x-api-key": apiKey,
},
});
const pollData = await pollResponse.json();
// If the job is completed or failed, return the results
if (pollData.status !== "pending") {
return pollData.result || { error: pollData.error || "Unknown error" };
}
// If we've reached the maximum attempts, return a timeout error
if (attempts >= maxAttempts) {
return { error: "Extraction job timed out" };
}
}
return { error: "Extraction job timed out" };
}
server.tool(
"extract-structured-data",
"Extract structured data from a website.",
{
url: z.string().url().describe("URL of the website to extract data from"),
prompt: z.string().describe("Prompt to extract data from the website"),
},
async ({ url, prompt }) => {
const result = await callScrapezyApi(url, prompt);
if ("error" in result) {
return {
content: [
{
type: "text",
text: `Failed to extract data from ${url}: ${result.error}`,
},
],
};
}
return {
content: [
{
type: "text",
text: JSON.stringify(result, null, 2),
},
],
};
}
);
async function main() {
const transport = new StdioServerTransport();
await server.connect(transport);
}
main().catch((error) => {
console.error("Server error:", error);
process.exit(1);
});