crawl
Extract content from any URL. Retrieve parsed text and structured data for further use.
Instructions
Extract content from URL
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| url | Yes | URL to crawl |
Implementation Reference
- src/tools/crawl.ts:10-50 (handler)The main handler function for the crawl tool. Validates args, makes an API request to the CRAWL endpoint, and returns the crawled results as JSON.
export async function handleCrawl(args: unknown, apiKey?: string) { if (!isValidCrawlArgs(args)) { throw new McpError( ErrorCode.InvalidParams, "Invalid crawl arguments" ); } const { url } = args; log("Starting crawl for:", url); try { const startTime = Date.now(); const response = await makeRequest<CrawlResponse>( API_CONFIG.ENDPOINTS.CRAWL, { url }, apiKey ); const endTime = Date.now(); log(`Crawl completed successfully in ${endTime - startTime}ms`); return { content: [{ type: "text", mimeType: "application/json", text: JSON.stringify(response.results, null, 2) }] }; } catch (error) { log("Crawl error:", error); return { content: [{ type: "text", mimeType: "text/plain", text: `Crawl API error: ${formatError(error)}` }], isError: true }; } } - src/types.ts:110-121 (schema)Type definitions: CrawlResult (title, link, content), CrawlResponse (crawlParameters + results), and CrawlArgs (url).
export interface CrawlResult { title: string; link: string; content: string; } export interface CrawlResponse { crawlParameters: { url: string; }; results: CrawlResult; } - src/types.ts:127-139 (schema)Validation function isValidCrawlArgs that checks args is a non-null object with a non-empty url string.
export function isValidCrawlArgs(args: unknown): args is CrawlArgs { if (typeof args !== 'object' || args === null) { return false; } const { url } = args as CrawlArgs; if (typeof url !== 'string' || url.trim().length === 0) { return false; } return true; } - src/tools/index.ts:109-123 (registration)Tool registration: CRAWL_TOOL constant with name 'crawl', description 'Extract content from URL', and input schema requiring a url string.
// Crawl tool definition export const CRAWL_TOOL: Tool = { name: "crawl", description: "Extract content from URL", inputSchema: { type: "object", properties: { url: { type: "string", description: "URL to crawl" } }, required: ["url"] } }; - src/tools/handlers.ts:18-47 (registration)Dispatcher function handleToolCall that routes 'crawl' tool name to handleCrawl.
export async function handleToolCall(toolName: string, args: unknown, apiKey?: string) { log(`Handling tool call: ${toolName}`); switch (toolName) { case SEARCH_TOOL.name: return await handleSearch(args, apiKey); case CRAWL_TOOL.name: return await handleCrawl(args, apiKey); case SITEMAP_TOOL.name: return await handleSitemap(args, apiKey); case NEWS_TOOL.name: return await handleNews(args, apiKey); case REASONING_TOOL.name: return await handleReasoning(args, apiKey); case TRENDING_TOOL.name: return await handleTrending(args, apiKey); default: log(`Unknown tool: ${toolName}`); throw new McpError( ErrorCode.InvalidParams, `Unknown tool: ${toolName}` ); } } - src/config.ts:40-50 (helper)API config: ENDPOINTS.CRAWL is set to '/crawl' on the base API URL.
BASE_URL: 'https://api.search1api.com', DEFAULT_QUERY: 'latest news in the world', ENDPOINTS: { SEARCH: '/search', CRAWL: '/crawl', SITEMAP: '/sitemap', NEWS: '/news', REASONING: '/v1/chat/completions', TRENDING: '/trending' } } as const;