MCP Server for Crawl4AI

types.ts•8.57 KiB

export interface CrawlOptions { remove_images?: boolean; bypass_cache?: boolean; filter_mode?: 'blacklist' | 'whitelist'; filter_list?: string[]; screenshot?: boolean; wait_for?: string; timeout?: number; } export interface JSExecuteOptions { js_code: string | string[]; // Only url and js_code (scripts) are supported by /execute_js endpoint } export interface JSExecuteEndpointOptions { url: string; scripts: string | string[]; // Only url and scripts are supported by /execute_js endpoint } export interface JSExecuteEndpointResponse { success: boolean; js_execution_result: { success: boolean; results: unknown[]; }; markdown?: string | CrawlMarkdownResult; } export interface ScreenshotEndpointOptions { url: string; screenshot_wait_for?: number; save_to_directory?: string; // output_path is omitted to get base64 response } export interface ScreenshotEndpointResponse { success: boolean; screenshot: string; // base64 encoded image } export interface PDFEndpointOptions { url: string; // Only url is supported by /pdf endpoint } export interface PDFEndpointResponse { success: boolean; pdf: string; // base64 encoded PDF } export interface HTMLEndpointOptions { url: string; // Only url is supported by /html endpoint } export interface HTMLEndpointResponse { html: string; url: string; success: boolean; } export type FilterType = 'raw' | 'fit' | 'bm25' | 'llm'; export interface MarkdownEndpointOptions { url: string; f?: FilterType; // Filter type: raw, fit (default), bm25, llm q?: string; // Query string for bm25/llm filters c?: string; // Cache-bust parameter } export interface MarkdownEndpointResponse { url: string; filter: string; query: string | null; cache: string; markdown: string; success: boolean; } export interface LLMEndpointOptions { url: string; query: string; } export interface LLMEndpointResponse { answer: string; } export interface BatchCrawlOptions extends CrawlOptions { urls: string[]; max_concurrent?: number; // New: Support per-URL configs array (0.7.3/0.7.4) configs?: Array<{ url: string; browser_config?: BrowserConfig; crawler_config?: CrawlerConfig; extraction_strategy?: ExtractionStrategy; table_extraction_strategy?: TableExtractionStrategy; markdown_generator_options?: MarkdownGeneratorOptions; matcher?: string | ((url: string) => boolean); }>; } // Browser configuration options export interface BrowserConfig { browser_type?: 'chromium' | 'firefox' | 'webkit' | 'undetected'; headless?: boolean; viewport_width?: number; viewport_height?: number; user_agent?: string; // Unified proxy config - accepts string or object format (new in 0.7.3/0.7.4) proxy?: | string | { server: string; username?: string; password?: string; }; // Legacy field kept for backward compatibility proxy_config?: { server: string; username?: string; password?: string; }; cookies?: Array<{ name: string; value: string; domain: string; path?: string; }>; headers?: Record<string, string>; extra_args?: string[]; } // Virtual scroll configuration for sites like Twitter/Instagram export interface VirtualScrollConfig { container_selector: string; scroll_count?: number; scroll_by?: string | number; wait_after_scroll?: number; } // Crawler configuration options export interface CrawlerConfig { // Content filtering word_count_threshold?: number; excluded_tags?: string[]; excluded_selector?: string; remove_overlay_elements?: boolean; only_text?: boolean; remove_forms?: boolean; keep_data_attributes?: boolean; // JavaScript execution js_code?: string | string[]; js_only?: boolean; wait_for?: string; wait_for_timeout?: number; // Page navigation & timing wait_until?: 'domcontentloaded' | 'networkidle' | 'load'; page_timeout?: number; wait_for_images?: boolean; ignore_body_visibility?: boolean; // Dynamic content handling delay_before_scroll?: number; scroll_delay?: number; scan_full_page?: boolean; virtual_scroll_config?: VirtualScrollConfig; // Content processing process_iframes?: boolean; exclude_external_links?: boolean; // Media handling screenshot?: boolean; screenshot_wait_for?: number; pdf?: boolean; capture_mhtml?: boolean; image_description_min_word_threshold?: number; image_score_threshold?: number; exclude_external_images?: boolean; // Link filtering exclude_social_media_links?: boolean; exclude_domains?: string[]; // Page interaction simulate_user?: boolean; override_navigator?: boolean; magic?: boolean; // Session management session_id?: string; // Cache control cache_mode?: 'ENABLED' | 'BYPASS' | 'DISABLED'; // Performance options timeout?: number; verbose?: boolean; // Debug log_console?: boolean; // New parameters from 0.7.3/0.7.4 delay_before_return_html?: number; // Delay in ms before capturing final HTML css_selector?: string; // CSS selector to extract specific elements include_links?: boolean; // Whether to include links in the response resolve_absolute_urls?: boolean; // Convert relative URLs to absolute ones } // Extraction strategy passthrough objects (new in 0.7.3/0.7.4) export interface ExtractionStrategy { [key: string]: unknown; } export interface TableExtractionStrategy { enable_chunking?: boolean; thresholds?: Record<string, unknown>; [key: string]: unknown; } export interface MarkdownGeneratorOptions { include_links?: boolean; [key: string]: unknown; } // Advanced crawl configuration combining browser and crawler configs export interface AdvancedCrawlConfig { url?: string; urls?: string[]; browser_config?: BrowserConfig; crawler_config?: CrawlerConfig; priority?: number; extraction_strategy?: ExtractionStrategy; table_extraction_strategy?: TableExtractionStrategy; markdown_generator_options?: MarkdownGeneratorOptions; } // Session management types (used internally by MCP server) export interface SessionInfo { id: string; created_at: Date; last_used: Date; initial_url?: string; metadata?: Record<string, unknown>; } // Crawl endpoint types export interface CrawlEndpointOptions { urls: string[]; browser_config?: BrowserConfig; crawler_config?: CrawlerConfig; } export interface CrawlMarkdownResult { raw_markdown: string; markdown_with_citations: string; references_markdown: string; fit_markdown: string; fit_html: string; } export interface CrawlMediaResult { images: Array<{ src?: string | null; data?: string; alt?: string | null; desc?: string; score?: number; type?: string; group_id?: number; format?: string | null; width?: number | null; }>; videos: Array<{ src?: string | null; data?: string; alt?: string | null; desc?: string; score?: number; type?: string; group_id?: number; format?: string | null; width?: number | null; }>; audios: Array<{ src?: string | null; data?: string; alt?: string | null; desc?: string; score?: number; type?: string; group_id?: number; format?: string | null; width?: number | null; }>; } interface LinkItem { href: string; text: string; title: string; base_domain?: string | null; head_data?: Record<string, unknown> | null; head_extraction_status?: string | null; head_extraction_error?: string | null; intrinsic_score?: number; contextual_score?: number | null; total_score?: number | null; } export interface CrawlLinksResult { internal: LinkItem[]; external: LinkItem[]; } export interface CrawlResultItem { url: string; html: string; cleaned_html: string; fit_html: string; success: boolean; error_message?: string; status_code: number; response_headers: Record<string, unknown>; redirected_url?: string; session_id: string | null; metadata: Record<string, unknown>; links: CrawlLinksResult; media: CrawlMediaResult; markdown: CrawlMarkdownResult; tables: unknown[]; extracted_content: unknown | null; screenshot: string | null; // base64 PNG when screenshot: true pdf: string | null; // base64 PDF when pdf: true mhtml: string | null; js_execution_result: { success: boolean; results: unknown[]; } | null; downloaded_files: unknown | null; network_requests: unknown | null; console_messages: unknown | null; ssl_certificate: unknown | null; dispatch_result: unknown | null; } export interface CrawlEndpointResponse { success: boolean; results: CrawlResultItem[]; server_processing_time_s: number; server_memory_delta_mb: number; server_peak_memory_mb: number; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/omgwtfwow/mcp-crawl4ai-ts'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

types.ts•8.57 KiB