Skip to main content
Glama

MCP Server for Crawl4AI

by omgwtfwow
types.ts8.77 kB
export interface CrawlOptions { remove_images?: boolean; bypass_cache?: boolean; filter_mode?: 'blacklist' | 'whitelist'; filter_list?: string[]; screenshot?: boolean; wait_for?: string; timeout?: number; } export interface JSExecuteOptions { js_code: string | string[]; // Only url and js_code (scripts) are supported by /execute_js endpoint } export interface JSExecuteEndpointOptions { url: string; scripts: string | string[]; // Only url and scripts are supported by /execute_js endpoint } export interface JSExecuteEndpointResponse { success: boolean; js_execution_result: { success: boolean; results: unknown[]; }; markdown?: string | CrawlMarkdownResult; } export interface ScreenshotEndpointOptions { url: string; screenshot_wait_for?: number; save_to_directory?: string; // output_path is omitted to get base64 response } export interface ScreenshotEndpointResponse { success: boolean; screenshot: string; // base64 encoded image } export interface PDFEndpointOptions { url: string; // Only url is supported by /pdf endpoint } export interface PDFEndpointResponse { success: boolean; pdf: string; // base64 encoded PDF } export interface HTMLEndpointOptions { url: string; // Only url is supported by /html endpoint } export interface HTMLEndpointResponse { html: string; url: string; success: boolean; } export type FilterType = 'raw' | 'fit' | 'bm25' | 'llm'; export interface MarkdownEndpointOptions { url: string; f?: FilterType; // Filter type: raw, fit (default), bm25, llm q?: string; // Query string for bm25/llm filters c?: string; // Cache-bust parameter } export interface MarkdownEndpointResponse { url: string; filter: string; query: string | null; cache: string; markdown: string; success: boolean; } export interface LLMEndpointOptions { url: string; query: string; } export interface LLMEndpointResponse { answer: string; } export interface BatchCrawlOptions extends CrawlOptions { urls: string[]; max_concurrent?: number; // New: Support per-URL configs array (0.7.3/0.7.4) configs?: Array<{ url: string; browser_config?: BrowserConfig; crawler_config?: CrawlerConfig; extraction_strategy?: ExtractionStrategy; table_extraction_strategy?: TableExtractionStrategy; markdown_generator_options?: MarkdownGeneratorOptions; matcher?: string | ((url: string) => boolean); }>; } // Browser configuration options export interface BrowserConfig { browser_type?: 'chromium' | 'firefox' | 'webkit' | 'undetected'; headless?: boolean; viewport_width?: number; viewport_height?: number; user_agent?: string; // Unified proxy config - accepts string or object format (new in 0.7.3/0.7.4) proxy?: | string | { server: string; username?: string; password?: string; }; // Legacy field kept for backward compatibility proxy_config?: { server: string; username?: string; password?: string; }; cookies?: Array<{ name: string; value: string; domain: string; path?: string; }>; headers?: Record<string, string>; extra_args?: string[]; } // Virtual scroll configuration for sites like Twitter/Instagram export interface VirtualScrollConfig { container_selector: string; scroll_count?: number; scroll_by?: string | number; wait_after_scroll?: number; } // Crawler configuration options export interface CrawlerConfig { // Content filtering word_count_threshold?: number; excluded_tags?: string[]; excluded_selector?: string; remove_overlay_elements?: boolean; only_text?: boolean; remove_forms?: boolean; keep_data_attributes?: boolean; // JavaScript execution js_code?: string | string[]; js_only?: boolean; wait_for?: string; wait_for_timeout?: number; // Page navigation & timing wait_until?: 'domcontentloaded' | 'networkidle' | 'load'; page_timeout?: number; wait_for_images?: boolean; ignore_body_visibility?: boolean; // Dynamic content handling delay_before_scroll?: number; scroll_delay?: number; scan_full_page?: boolean; virtual_scroll_config?: VirtualScrollConfig; // Content processing process_iframes?: boolean; exclude_external_links?: boolean; // Media handling screenshot?: boolean; screenshot_wait_for?: number; pdf?: boolean; capture_mhtml?: boolean; image_description_min_word_threshold?: number; image_score_threshold?: number; exclude_external_images?: boolean; // Link filtering exclude_social_media_links?: boolean; exclude_domains?: string[]; // Page interaction simulate_user?: boolean; override_navigator?: boolean; magic?: boolean; // Session management session_id?: string; // Cache control cache_mode?: 'ENABLED' | 'BYPASS' | 'DISABLED'; // Performance options timeout?: number; verbose?: boolean; // Debug log_console?: boolean; // New parameters from 0.7.3/0.7.4 delay_before_return_html?: number; // Delay in ms before capturing final HTML css_selector?: string; // CSS selector to extract specific elements include_links?: boolean; // Whether to include links in the response resolve_absolute_urls?: boolean; // Convert relative URLs to absolute ones } // Extraction strategy passthrough objects (new in 0.7.3/0.7.4) export interface ExtractionStrategy { [key: string]: unknown; } export interface TableExtractionStrategy { enable_chunking?: boolean; thresholds?: Record<string, unknown>; [key: string]: unknown; } export interface MarkdownGeneratorOptions { include_links?: boolean; [key: string]: unknown; } // Advanced crawl configuration combining browser and crawler configs export interface AdvancedCrawlConfig { url?: string; urls?: string[]; browser_config?: BrowserConfig; crawler_config?: CrawlerConfig; priority?: number; extraction_strategy?: ExtractionStrategy; table_extraction_strategy?: TableExtractionStrategy; markdown_generator_options?: MarkdownGeneratorOptions; } // Session management types (used internally by MCP server) export interface SessionInfo { id: string; created_at: Date; last_used: Date; initial_url?: string; metadata?: Record<string, unknown>; } // Crawl endpoint types export interface CrawlEndpointOptions { urls: string[]; browser_config?: BrowserConfig; crawler_config?: CrawlerConfig; } export interface CrawlMarkdownResult { raw_markdown: string; markdown_with_citations: string; references_markdown: string; fit_markdown: string; fit_html: string; } export interface CrawlMediaResult { images: Array<{ src?: string | null; data?: string; alt?: string | null; desc?: string; score?: number; type?: string; group_id?: number; format?: string | null; width?: number | null; }>; videos: Array<{ src?: string | null; data?: string; alt?: string | null; desc?: string; score?: number; type?: string; group_id?: number; format?: string | null; width?: number | null; }>; audios: Array<{ src?: string | null; data?: string; alt?: string | null; desc?: string; score?: number; type?: string; group_id?: number; format?: string | null; width?: number | null; }>; } interface LinkItem { href: string; text: string; title: string; base_domain?: string | null; head_data?: Record<string, unknown> | null; head_extraction_status?: string | null; head_extraction_error?: string | null; intrinsic_score?: number; contextual_score?: number | null; total_score?: number | null; } export interface CrawlLinksResult { internal: LinkItem[]; external: LinkItem[]; } export interface CrawlResultItem { url: string; html: string; cleaned_html: string; fit_html: string; success: boolean; error_message?: string; status_code: number; response_headers: Record<string, unknown>; redirected_url?: string; session_id: string | null; metadata: Record<string, unknown>; links: CrawlLinksResult; media: CrawlMediaResult; markdown: CrawlMarkdownResult; tables: unknown[]; extracted_content: unknown | null; screenshot: string | null; // base64 PNG when screenshot: true pdf: string | null; // base64 PDF when pdf: true mhtml: string | null; js_execution_result: { success: boolean; results: unknown[]; } | null; downloaded_files: unknown | null; network_requests: unknown | null; console_messages: unknown | null; ssl_certificate: unknown | null; dispatch_result: unknown | null; } export interface CrawlEndpointResponse { success: boolean; results: CrawlResultItem[]; server_processing_time_s: number; server_memory_delta_mb: number; server_peak_memory_mb: number; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/omgwtfwow/mcp-crawl4ai-ts'

If you have feedback or need assistance with the MCP directory API, please join our Discord server