MCP Server
by la-rebelion
#!/usr/bin/env node
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import {
CallToolRequestSchema,
ListToolsRequestSchema,
CallToolResult,
Tool,
} from "@modelcontextprotocol/sdk/types.js";
import puppeteer, { Browser, Page } from "puppeteer";
import {
getEmbeddingSentTransformer,
initializeModelSentTransformer,
makeRequest,
semanticSearchRequestsSentTransformer,
} from "./utilities.js";
import { RequestRecord } from "./types.js";
import { FeatureExtractionPipeline } from "@xenova/transformers";
// Define the tools once to avoid repetition
const TOOLS: Tool[] = [
{
name: "puppeteer_navigate",
description: "Navigate to a URL",
inputSchema: {
type: "object",
properties: {
url: { type: "string" },
},
required: ["url"],
},
},
{
name: "puppeteer_page_history",
description: "Get the history of visited URLs, most recent urls first",
inputSchema: {
type: "object",
properties: {},
required: [],
},
},
{
name: "make_http_request",
description: "Make an HTTP request with curl",
inputSchema: {
type: "object",
properties: {
type: {
type: "string",
description: "Type of the request. GET, POST, PUT, DELETE",
},
url: {
type: "string",
description: "Url to make the request to",
},
headers: {
type: "object",
description: "Headers to include in the request",
},
body: {
type: "object",
description: "Body to include in the request",
},
},
required: ["type", "url", "headers", "body"],
},
},
{
name: "semantic_search_requests",
description:
"Semantically search for requests that occurred within a page URL. Returns the top 10 results.",
inputSchema: {
type: "object",
properties: {
query: {
type: "string",
description:
"Your search request. Make this specific and detailed to get the best results",
},
page_url: {
type: "string",
description: "The page within which to search for requests",
},
},
required: ["query", "page_url"],
},
},
];
// Global state
let browser: Browser | undefined;
let page: Page | undefined;
const consoleLogs: string[] = [];
const requests: Map<string, RequestRecord[]> = new Map(); // collects all results
const urlHistory: Array<string> = [];
let pipeline: FeatureExtractionPipeline | undefined;
initializeModelSentTransformer().then((sent_pipeline) => {
console.error("model loaded");
console.error("model", sent_pipeline);
pipeline = sent_pipeline;
});
async function ensureBrowser() {
if (!browser) {
const npx_args = { headless: false };
const docker_args = {
headless: true,
args: ["--no-sandbox", "--single-process", "--no-zygote"],
};
browser = await puppeteer.launch(
process.env.DOCKER_CONTAINER ? docker_args : npx_args
);
const pages = await browser.pages();
page = pages[0];
page.setRequestInterception(true);
page.on("console", (msg) => {
const logEntry = `[${msg.type()}] ${msg.text()}`;
consoleLogs.push(logEntry);
server.notification({
method: "notifications/resources/updated",
params: { uri: "console://logs" },
});
});
page.on("request", async (request) => {
if (!pipeline) {
console.error(
"Request made before model was loaded.",
request.url(),
page.url()
);
request.continue();
return;
}
if (requests.has(page.url())) {
requests.get(page.url()).unshift({
url: request.url(),
resourceType: request.resourceType(),
method: request.method(),
headers: request.headers(),
postData: request.postData(),
embedding: await getEmbeddingSentTransformer(
request.url() +
request.method() +
JSON.stringify(request.headers()) +
JSON.stringify(request.postData()),
pipeline
),
});
} else {
requests.set(page.url(), [
{
url: request.url(),
resourceType: request.resourceType(),
method: request.method(),
headers: request.headers(),
postData: request.postData(),
embedding: await getEmbeddingSentTransformer(
request.url() +
request.method() +
JSON.stringify(request.headers()) +
JSON.stringify(request.postData()),
pipeline
),
},
]);
}
request.continue();
});
}
return page!;
}
declare global {
interface Window {
mcpHelper: {
logs: string[];
originalConsole: Partial<typeof console>;
};
}
}
async function handleToolCall(
name: string,
args: any
): Promise<CallToolResult> {
const page = await ensureBrowser();
switch (name) {
case "puppeteer_navigate":
await page.goto(args.url);
return {
content: [
{
type: "text",
text: `Navigated to ${args.url}`,
},
],
isError: false,
};
case "page_history":
return {
content: [
{
type: "text",
text: urlHistory.reverse().join("\n"),
},
],
isError: false,
};
case "make_http_request": {
const response = await makeRequest(
args.url,
args.type,
args.headers,
args.body
);
return {
content: [{ type: "text", text: JSON.stringify(response, null, 2) }],
isError: false,
};
}
case "semantic_search_requests": {
if (!pipeline) {
return {
content: [{ type: "text", text: "Model not defined" }],
isError: true,
};
}
const searchResults = await semanticSearchRequestsSentTransformer(
args.query,
requests.get(args.page_url),
pipeline
);
const withoutEmbedding = searchResults.map(
({ embedding, similarity, ...rest }) => rest
);
return {
content: [
{ type: "text", text: JSON.stringify(withoutEmbedding, null, 2) },
],
isError: false,
};
}
default:
return {
content: [
{
type: "text",
text: `Unknown tool: ${name}`,
},
],
isError: true,
};
}
}
const server = new Server(
{
name: "mcp-scrape-copilot",
version: "0.1.0",
},
{
capabilities: {
resources: {},
tools: {},
},
}
);
server.setRequestHandler(ListToolsRequestSchema, async () => ({
tools: TOOLS,
}));
server.setRequestHandler(CallToolRequestSchema, async (request) =>
handleToolCall(request.params.name, request.params.arguments ?? {})
);
async function runServer() {
const transport = new StdioServerTransport();
await server.connect(transport);
}
runServer().catch(console.error);
process.stdin.on("close", () => {
console.error("Puppeteer MCP Server closed");
server.close();
});