Skip to main content
Glama
tanevanwifferen

DocsScraper

index.ts8.14 kB
#!/usr/bin/env node /** * DocsScraper MCP Server * * This MCP server connects to the DocsScraper web API to provide semantic search capabilities. * It interfaces with the /api/chunks/search endpoint to search through documentation chunks * using embeddings and AI validation. The server provides tools for searching documentation * and can fall back to scraper sources when no local results are found. */ import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { CallToolRequestSchema, ListResourcesRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema, } from "@modelcontextprotocol/sdk/types.js"; import axios from "axios"; /** * Configuration from environment variables */ const API_BASE_URL = process.env.DOCS_SCRAPER_BASE_URL || "https://api.mcpdocsscraper.click"; const API_KEY = process.env.DOCS_SCRAPER_API_KEY; if (!API_KEY) { console.error("DOCS_SCRAPER_API_KEY environment variable is required"); process.exit(1); } /** * Type definitions for API responses */ interface DocsChunk { id: number; service: string; url: string; oneLiner: string; fullContent: string; embeddingVector?: number[]; } interface SearchResult { chunk: DocsChunk; score: number; source?: string; } interface SearchResponse { results?: SearchResult[]; message?: string; } /** * Create an MCP server with capabilities for tools (to search documentation). * This server connects to the DocsScraper web API to provide semantic search functionality. */ const server = new Server( { name: "docs-scraper-server", version: "0.2.0", }, { capabilities: { tools: {}, resources: {}, }, } ); /** * Handler that lists available tools. * Exposes a "search_docs" tool that lets clients search through documentation chunks. */ server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: [ { name: "search_docs", description: "Search through documentation chunks using semantic search. Make sure your query is specific to get the best results. Forgetting to add 'api' to the query will return ui results etc.", inputSchema: { type: "object", properties: { query: { type: "string", description: "The search query to find relevant documentation" }, top: { type: "number", description: "Maximum number of results to return (1-10, default: 5)", minimum: 1, maximum: 10, default: 5 }, service: { type: "string", description: "Service name to filter results by (case-insensitive)" } }, required: ["query", "service"] } } ] }; }); /** * Handler for the search_docs tool. * Makes a request to the DocsScraper web API to search for documentation chunks. */ server.setRequestHandler(CallToolRequestSchema, async (request) => { switch (request.params.name) { case "search_docs": { const query = String(request.params.arguments?.query || ""); const top = Number(request.params.arguments?.top || 5); const service = request.params.arguments?.service ? String(request.params.arguments.service) : undefined; if (!query.trim()) { throw new Error("Query parameter is required and cannot be empty"); } // Clamp top to safe range const clampedTop = Math.max(1, Math.min(10, top)); try { // Build request parameters const params: any = { query: query, top: clampedTop }; // Include service parameter if provided if (service && service.trim()) { params.service = service.trim(); } // Make request to DocsScraper API const response = await axios.get(`${API_BASE_URL}/api/chunks/search`, { params, headers: { 'X-API-Key': API_KEY, 'Content-Type': 'application/json' }, timeout: 180000 // 30 second timeout }); const results = response.data; if (Array.isArray(results) && results.length > 0) { // Format results for display const formattedResults = results.map((result: SearchResult, index: number) => { const chunk = result.chunk; const score = result.score ? ` (Score: ${result.score.toFixed(3)})` : ''; const source = result.source ? ` [Source: ${result.source}]` : ''; return `**Result ${index + 1}${score}${source}** Service: ${chunk.service} URL: ${chunk.url} Summary: ${chunk.oneLiner} Content: ${chunk.fullContent} ---`; }).join('\n\n'); return { content: [{ type: "text", text: `Found ${results.length} relevant documentation chunk(s) for query: "${query}"\n\n${formattedResults}` }] }; } else { return { content: [{ type: "text", text: `No documentation chunks found for query: "${query}". The search may have found no relevant results, or the scraper sources may not have returned any matches.` }] }; } } catch (error) { if (axios.isAxiosError(error)) { if (error.response?.status === 401) { throw new Error("Authentication failed. Please check your API key."); } else if (error.response?.status === 400) { throw new Error(`Bad request: ${error.response.data?.message || 'Invalid query parameters'}`); } else if (error.response?.status === 404) { throw new Error("DocsScraper API endpoint not found. Please check the base URL."); } else if (error.code === 'ECONNREFUSED') { throw new Error(`Cannot connect to DocsScraper API at ${API_BASE_URL}. Please ensure the service is running.`); } else { throw new Error(`API request failed: ${error.response?.status} ${error.response?.statusText || error.message}`); } } else { throw new Error(`Search failed: ${error instanceof Error ? error.message : 'Unknown error'}`); } } } default: throw new Error("Unknown tool"); } }); /** * Handler for listing available resources. * Provides information about the connected DocsScraper API. */ server.setRequestHandler(ListResourcesRequestSchema, async () => { return { resources: [ { uri: "docs-scraper://api/info", mimeType: "text/plain", name: "DocsScraper API Info", description: "Information about the connected DocsScraper API" } ] }; }); /** * Handler for reading the contents of a specific resource. * Provides information about the DocsScraper API connection. */ server.setRequestHandler(ReadResourceRequestSchema, async (request) => { const url = new URL(request.params.uri); if (url.protocol === "docs-scraper:" && url.pathname === "/api/info") { return { contents: [{ uri: request.params.uri, mimeType: "text/plain", text: `DocsScraper API Connection Info: - Base URL: ${API_BASE_URL} - API Key: ${API_KEY ? '***configured***' : 'NOT SET'} - Endpoint: ${API_BASE_URL}/api/chunks/search - Authentication: API Key via X-API-Key header This MCP server connects to your DocsScraper web application to provide semantic search capabilities through documentation chunks. The search uses embeddings and AI validation to find the most relevant results.` }] }; } throw new Error(`Resource not found: ${request.params.uri}`); }); /** * Start the server using stdio transport. * This allows the server to communicate via standard input/output streams. */ async function main() { const transport = new StdioServerTransport(); await server.connect(transport); } main().catch((error) => { console.error("Server error:", error); process.exit(1); });

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tanevanwifferen/DocsScraperMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server