de en es ja ko ru zh

mcp-omnisearch

by spences10

TypeScript

MIT License

461

203

Overview InspectNew Endpoints Schema Related Servers Reviews Score

Need Help?View Source Code Report Issue

index.ts

index.ts•7.65 kB

import { http_json } from '../../../common/http.js'; import { ErrorType, ProcessingProvider, ProcessingResult, ProviderError, } from '../../../common/types.js'; import { is_valid_url, retry_with_backoff, validate_api_key, } from '../../../common/utils.js'; import { config } from '../../../config/env.js'; interface FirecrawlActionsResponse { success: boolean; data?: { markdown?: string; html?: string; rawHtml?: string; screenshot?: string; actions?: { screenshots?: string[]; }; metadata?: { title?: string; description?: string; language?: string; sourceURL?: string; statusCode?: number; error?: string; [key: string]: any; }; }; error?: string; } // Define the action types type ActionType = 'click' | 'type' | 'scroll' | 'wait' | 'select'; interface Action { type: ActionType; selector?: string; text?: string; x?: number; y?: number; duration?: number; value?: string; } export class FirecrawlActionsProvider implements ProcessingProvider { name = 'firecrawl_actions'; description = 'Support for page interactions (clicking, scrolling, etc.) before extraction for dynamic content using Firecrawl. Enables extraction from JavaScript-heavy sites, single-page applications, and content behind user interactions. Best for accessing content that requires navigation, form filling, or other interactions.'; async process_content( url: string | string[], extract_depth: 'basic' | 'advanced' = 'basic', ): Promise<ProcessingResult> { // Actions works with a single URL const actions_url = Array.isArray(url) ? url[0] : url; // Validate URL if (!is_valid_url(actions_url)) { throw new ProviderError( ErrorType.INVALID_INPUT, `Invalid URL provided: ${actions_url}`, this.name, ); } const actions_request = async () => { const api_key = validate_api_key( config.processing.firecrawl_actions.api_key, this.name, ); try { // Define actions based on extract_depth // For basic, we'll just scroll down once to load more content // For advanced, we'll perform more complex interactions const actions: Action[] = extract_depth === 'advanced' ? [ { type: 'wait', duration: 2000 }, // Wait for initial page load { type: 'scroll', duration: 1000 }, // Scroll down { type: 'wait', duration: 1000 }, // Wait for content to load { type: 'scroll', duration: 1000 }, // Scroll down more { type: 'wait', duration: 1000 }, // Wait for content to load // Click on "Read more" or "Show more" buttons if they exist { type: 'click', selector: 'button:contains("Read more"), button:contains("Show more"), a:contains("Read more"), a:contains("Show more")', }, { type: 'wait', duration: 2000 }, // Wait for content to expand ] : [ { type: 'wait', duration: 2000 }, // Wait for initial page load { type: 'scroll', duration: 1000 }, // Scroll down once { type: 'wait', duration: 1000 }, // Wait for content to load ]; // Start the actions const actions_data = await http_json<FirecrawlActionsResponse>( this.name, config.processing.firecrawl_actions.base_url, { method: 'POST', headers: { Authorization: `Bearer ${api_key}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ url: actions_url, formats: ['markdown', 'screenshot'], actions: actions.map((action) => { // Convert our action format to Firecrawl's action format switch (action.type) { case 'wait': return { type: 'wait', milliseconds: action.duration || 1000, selector: action.selector, }; case 'scroll': return { type: 'scroll', // Firecrawl might use different parameters for scroll // Adjust as needed based on their documentation }; case 'click': return { type: 'click', selector: action.selector, x: action.x, y: action.y, }; case 'type': return { type: 'type', selector: action.selector, text: action.text || '', }; case 'select': return { type: 'select', selector: action.selector, value: action.value || '', }; default: return action; } }), }), signal: AbortSignal.timeout( config.processing.firecrawl_actions.timeout, ), }, ); // Check if there was an error in the response if (!actions_data.success || actions_data.error) { throw new ProviderError( ErrorType.PROVIDER_ERROR, `Error performing actions: ${actions_data.error || 'Unknown error'}`, this.name, ); } // Check if we have data if (!actions_data.data) { throw new ProviderError( ErrorType.PROVIDER_ERROR, 'No data returned from API', this.name, ); } // Check if we have content if ( !actions_data.data.markdown && !actions_data.data.html && !actions_data.data.rawHtml ) { throw new ProviderError( ErrorType.PROVIDER_ERROR, 'No content extracted after performing actions', this.name, ); } // Prefer markdown, fallback to HTML, then rawHtml const content = actions_data.data.markdown || actions_data.data.html || actions_data.data.rawHtml || ''; // Add information about the actions performed const actions_description = `# Content from ${actions_url} after interactions\n\n` + `The following actions were performed before extraction:\n\n` + actions .map((action, index) => { switch (action.type) { case 'click': return `${index + 1}. Click on ${action.selector || `coordinates (${action.x}, ${action.y})`}`; case 'type': return `${index + 1}. Type "${action.text}" ${action.selector ? `into ${action.selector}` : ''}`; case 'scroll': return `${index + 1}. Scroll ${action.duration ? `for ${action.duration}ms` : ''}`; case 'wait': return `${index + 1}. Wait ${action.duration ? `for ${action.duration}ms` : ''}`; case 'select': return `${index + 1}. Select "${action.value}" from ${action.selector}`; default: return `${index + 1}. Perform ${action.type} action`; } }) .join('\n') + '\n\n---\n\n' + content; // Create a single raw_content entry const raw_contents = [ { url: actions_url, content: actions_description, }, ]; // Calculate word count const word_count = actions_description .split(/\s+/) .filter(Boolean).length; return { content: actions_description, raw_contents, metadata: { title: `Content from ${actions_url} after interactions`, word_count, urls_processed: 1, successful_extractions: 1, extract_depth, screenshot: actions_data.data.screenshot, }, source_provider: this.name, }; } catch (error) { if (error instanceof ProviderError) { throw error; } throw new ProviderError( ErrorType.API_ERROR, `Failed to perform actions: ${ error instanceof Error ? error.message : 'Unknown error' }`, this.name, ); } }; return retry_with_backoff(actions_request); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/spences10/mcp-omnisearch'

If you have feedback or need assistance with the MCP directory API, please join our Discord server