de en es ja ko ru zh

mcp-omnisearch

by spences10

TypeScript

MIT License

461

203

Overview InspectNew Endpoints Schema Related Servers Reviews Score

Need Help?View Source Code Report Issue

index.ts

index.ts•5.6 kB

import { http_json } from '../../../common/http.js'; import { ErrorType, ProcessingProvider, ProcessingResult, ProviderError, } from '../../../common/types.js'; import { is_valid_url, retry_with_backoff, validate_api_key, } from '../../../common/utils.js'; import { config } from '../../../config/env.js'; interface FirecrawlScrapeResponse { success: boolean; data?: { markdown?: string; html?: string; rawHtml?: string; screenshot?: string; links?: string[]; metadata?: { title?: string; description?: string; language?: string; sourceURL?: string; statusCode?: number; error?: string; [key: string]: any; }; llm_extraction?: any; warning?: string; }; error?: string; } export class FirecrawlScrapeProvider implements ProcessingProvider { name = 'firecrawl_scrape'; description = 'Extract clean, LLM-ready data from single URLs with enhanced formatting options using Firecrawl. Efficiently converts web content into markdown, plain text, or structured data with configurable extraction options. Best for content analysis, data collection, and AI training data preparation.'; async process_content( url: string | string[], extract_depth: 'basic' | 'advanced' = 'basic', ): Promise<ProcessingResult> { const urls = Array.isArray(url) ? url : [url]; // Validate all URLs for (const u of urls) { if (!is_valid_url(u)) { throw new ProviderError( ErrorType.INVALID_INPUT, `Invalid URL provided: ${u}`, this.name, ); } } const scrape_request = async () => { const api_key = validate_api_key( config.processing.firecrawl_scrape.api_key, this.name, ); try { // Process each URL and collect results const results = await Promise.all( urls.map(async (single_url) => { try { const data = await http_json<FirecrawlScrapeResponse>( this.name, config.processing.firecrawl_scrape.base_url, { method: 'POST', headers: { Authorization: `Bearer ${api_key}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ url: single_url, formats: ['markdown'], onlyMainContent: true, waitFor: extract_depth === 'advanced' ? 5000 : 2000, }), signal: AbortSignal.timeout( config.processing.firecrawl_scrape.timeout, ), }, ); // Check if there was an error in the response if (!data.success || data.error) { throw new ProviderError( ErrorType.PROVIDER_ERROR, `Error scraping URL: ${ data.error || 'Unknown error' }`, this.name, ); } // Check if we have data if (!data.data) { throw new ProviderError( ErrorType.PROVIDER_ERROR, 'No data returned from API', this.name, ); } // Check if content was successfully extracted if ( !data.data.markdown && !data.data.html && !data.data.rawHtml ) { throw new ProviderError( ErrorType.PROVIDER_ERROR, 'No content extracted from URL', this.name, ); } // Prefer markdown, fallback to HTML, then rawHtml const content = data.data.markdown || data.data.html || data.data.rawHtml || ''; return { url: single_url, content, metadata: data.data.metadata, success: true, }; } catch (error) { // Log the error but continue processing other URLs console.error(`Error processing ${single_url}:`, error); return { url: single_url, content: '', success: false, error: error instanceof Error ? error.message : 'Unknown error', }; } }), ); // Filter successful and failed results const successful_results = results.filter((r) => r.success); const failed_urls = results .filter((r) => !r.success) .map((r) => r.url); // If all URLs failed, throw an error if (successful_results.length === 0) { throw new ProviderError( ErrorType.PROVIDER_ERROR, 'Failed to extract content from all URLs', this.name, ); } // Map results to raw_contents array const raw_contents = successful_results.map((result) => ({ url: result.url, content: result.content, })); // Combine all results into a single content string const combined_content = raw_contents .map((result) => result.content) .join('\n\n'); // Calculate total word count const word_count = combined_content .split(/\s+/) .filter(Boolean).length; // Get title from first successful result if available const title = successful_results[0]?.metadata?.title; return { content: combined_content, raw_contents, metadata: { title, word_count, failed_urls: failed_urls.length > 0 ? failed_urls : undefined, urls_processed: urls.length, successful_extractions: successful_results.length, extract_depth, }, source_provider: this.name, }; } catch (error) { if (error instanceof ProviderError) { throw error; } throw new ProviderError( ErrorType.API_ERROR, `Failed to extract content: ${ error instanceof Error ? error.message : 'Unknown error' }`, this.name, ); } }; return retry_with_backoff(scrape_request); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/spences10/mcp-omnisearch'

If you have feedback or need assistance with the MCP directory API, please join our Discord server