Skip to main content
Glama

Analytical MCP Server

exa_research.ts16.3 kB
import { z } from 'zod'; import fetch from 'node-fetch'; import { executeApiRequest, RETRYABLE_STATUS_CODES } from './api_helpers.js'; import { APIError, ValidationError, DataProcessingError } from './errors.js'; import { Logger } from './logger.js'; import { config, isFeatureEnabled } from './config.js'; import { factExtractor } from './advanced_fact_extraction.js'; import { rateLimitManager } from './rate_limit_manager.js'; import { researchCache, ResearchCacheNamespace } from './research_cache.js'; // Exa client configuration schema const ExaConfigSchema = z.object({ apiKey: z.string().optional().describe('Exa API key for authentication'), baseUrl: z.string().default('https://api.exa.ai').describe('Base URL for Exa API'), }); // Research query input schema const ExaResearchQuerySchema = z.object({ query: z.string().describe('Search query for research'), numResults: z.number().min(1).max(10).default(5).describe('Number of search results'), timeRangeMonths: z .number() .min(1) .max(36) .optional() .describe('Time range for results in months'), useWebResults: z.boolean().default(true).describe('Include web search results'), useNewsResults: z.boolean().default(false).describe('Include news results'), includeContents: z.boolean().default(true).describe('Include full content of search results'), }); // Exa search result type interface ExaSearchResult { title: string; url: string; publishedDate?: string; contents?: string; score?: number; } // Data validation options interface DataValidationOptions { originalData: any[]; context: string; fieldsToValidate?: string[]; } // Exa research utility class class ExaResearchTool { private apiKey: string; private baseUrl: string; constructor(exaConfig?: z.infer<typeof ExaConfigSchema>) { try { const parsedConfig = ExaConfigSchema.parse(exaConfig || {}); // Use validated environment config from config.js this.apiKey = parsedConfig.apiKey || config.EXA_API_KEY || ''; this.baseUrl = parsedConfig.baseUrl; if (!this.apiKey) { Logger.warn('No Exa API key provided. Research functionality will be limited.'); } else { const researchEnabled = isFeatureEnabled('researchIntegration'); Logger.debug('Exa research tool initialized', { baseUrl: this.baseUrl, researchEnabled, }); if (!researchEnabled) { Logger.info('Research integration is disabled in configuration.'); } // Configure rate limit manager for Exa API // Register the API key with the rate limit manager if (this.apiKey) { rateLimitManager.registerApiKeys('exa', [this.apiKey]); // Configure endpoint rate limits based on Exa API documentation // 10 requests per minute for search endpoint rateLimitManager.configureEndpoint('exa/search', 10, 60 * 1000); // 50 requests per hour for heavy operations rateLimitManager.configureEndpoint('exa/validate', 50, 60 * 60 * 1000); } } } catch (error) { if (error instanceof z.ZodError) { throw new ValidationError( 'ERR_1001', `Invalid Exa configuration: ${error.message}`, { issues: error.issues } ); } throw error; } } // Perform a web search and research async search( query: z.infer<typeof ExaResearchQuerySchema> ): Promise<{ results: ExaSearchResult[] }> { // Check if research integration is enabled if (!isFeatureEnabled('researchIntegration')) { Logger.warn( 'Research integration is disabled. Enable it with ENABLE_RESEARCH_INTEGRATION=true' ); throw new APIError( 'ERR_1002', 'Research integration is disabled in configuration' ); } let parsedQuery: z.infer<typeof ExaResearchQuerySchema>; try { parsedQuery = ExaResearchQuerySchema.parse(query); Logger.debug(`Executing Exa search for: "${parsedQuery.query}"`, { numResults: parsedQuery.numResults, useWebResults: parsedQuery.useWebResults, useNewsResults: parsedQuery.useNewsResults, }); } catch (error) { if (error instanceof z.ZodError) { Logger.error('Search query validation failed', error); throw new ValidationError( 'ERR_1001', `Invalid search query: ${error.message}`, { issues: error.issues, query } ); } throw error; } if (!this.apiKey) { Logger.error('Missing API key for Exa search'); throw new APIError( 'ERR_1002', 'Cannot perform search: Missing API key' ); } // Check cache first const cachedResults = researchCache.getSearchResults(parsedQuery.query, parsedQuery); if (cachedResults) { Logger.debug(`Cache hit for search query: "${parsedQuery.query}"`); return cachedResults; } try { // Use rate limit manager to handle request with sophisticated rate limiting const results = await rateLimitManager.executeRateLimitedRequest( async (apiKey) => { const response = await fetch(`${this.baseUrl}/search`, { method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}`, }, body: JSON.stringify({ query: parsedQuery.query, numResults: parsedQuery.numResults, timeRange: parsedQuery.timeRangeMonths ? `${parsedQuery.timeRangeMonths}m` : undefined, useWebResults: parsedQuery.useWebResults, useNewsResults: parsedQuery.useNewsResults, includeContents: parsedQuery.includeContents, }), }); if (!response.ok) { throw new APIError( 'ERR_1002', `Exa search failed: ${response.statusText}` ); } const data = (await response.json()) as { results: ExaSearchResult[] }; Logger.debug(`Exa search returned ${data.results.length} results`); return data; }, { provider: 'exa', endpoint: 'exa/search', maxRetries: 5, initialDelayMs: 1000, maxDelayMs: 30000, timeoutMs: 60000, useJitter: true, rotateKeysOnRateLimit: true, } ); // Cache the results researchCache.setSearchResults(parsedQuery.query, parsedQuery, results); return results; } catch (error) { // Log and rethrow the error with better context Logger.error('Exa search operation failed', error, { query: parsedQuery.query, numResults: parsedQuery.numResults, }); if (error instanceof APIError) { throw error; // Already properly formatted } throw new APIError( 'ERR_1002', `Exa search failed: ${error instanceof Error ? error.message : 'Unknown error'}` ); } } // Extract key facts from search results using advanced fact extraction extractKeyFacts(results: ExaSearchResult[], maxFacts: number = 5): string[] { try { if (!results || !Array.isArray(results)) { Logger.warn('Invalid search results provided to extractKeyFacts', { results }); return []; } // Check if we have cached facts for this content // Create a unique identifier for these results const resultsStr = JSON.stringify(results); const cachedFacts = researchCache.getExtractedFacts(resultsStr, maxFacts); if (cachedFacts) { Logger.debug(`Cache hit for fact extraction, returning ${cachedFacts.length} cached facts`); return cachedFacts; } // Process all content first const allContent = results .filter((result) => result.contents) .map((result) => ({ text: result.contents!, source: result.url, title: result.title, })); // Add all titles separately const allTitles = results.map((result) => ({ text: result.title, source: result.url, })); // Combine all text sources - content is more valuable than titles const allSources = [...allContent, ...allTitles]; // Use advanced fact extraction for all content const extractedFacts = []; for (const source of allSources) { const facts = factExtractor.extractFacts(source.text, { maxFacts: Math.ceil(maxFacts / 2), // Allow more facts initially, we'll filter later minLength: 40, maxLength: 200, requireVerbs: true, requireEntities: false, filterBoilerplate: true, }); // Add the source information to each fact extractedFacts.push( ...facts.map((fact) => ({ ...fact, source: source.source, })) ); } // Sort by score (most relevant first) and take top facts const topFacts = extractedFacts .sort((a, b) => b.score - a.score) .slice(0, maxFacts) .map((fact) => fact.text); Logger.debug( `Extracted ${topFacts.length} high-quality facts using advanced fact extraction` ); // Cache the extracted facts researchCache.setExtractedFacts(resultsStr, maxFacts, topFacts); return topFacts; } catch (error) { Logger.error('Error extracting facts from search results', error); // Fall back to basic extraction if advanced extraction fails try { Logger.warn('Falling back to basic fact extraction'); return this.fallbackExtractKeyFacts(results, maxFacts); } catch (fallbackError) { throw new DataProcessingError( 'ERR_1001', `Failed to extract facts: ${error instanceof Error ? error.message : 'Unknown error'}`, { resultsCount: results?.length } ); } } } // Fallback fact extraction (simplified method as backup) private fallbackExtractKeyFacts(results: ExaSearchResult[], maxFacts: number = 5): string[] { if (!results || !Array.isArray(results)) { return []; } const facts = results .flatMap((result) => { const contentFacts = result.contents ? this.findFactsInText(result.contents, 3) : []; const titleFacts = this.findFactsInText(result.title, 1); return [...contentFacts, ...titleFacts]; }) .slice(0, maxFacts); return facts; } // Simple fact extraction as fallback private findFactsInText(text: string, maxFacts: number = 3): string[] { if (!text || typeof text !== 'string') { return []; } try { // Basic fact extraction - used as fallback const sentences = text .split(/[.!?]/) .filter((s) => { const trimmed = s.trim(); return ( trimmed.length > 30 && !trimmed.toLowerCase().includes('disclaimer') && !trimmed.toLowerCase().includes('copyright') && !trimmed.toLowerCase().includes('cookies') && !trimmed.match(/terms of (service|use)/) ); }) .slice(0, maxFacts); return sentences.map((s) => s.trim()); } catch (error) { Logger.warn('Error finding facts in text', { error, textLength: text.length }); return []; } } // Validate or supplement data with research async validateData( options: DataValidationOptions ): Promise<{ validatedData: any[]; researchContext: string[]; }> { const { originalData, context, fieldsToValidate } = options; Logger.debug(`Validating data with research context: ${context}`, { dataLength: originalData?.length, fieldsToValidate, }); if (!originalData || !Array.isArray(originalData)) { Logger.warn('Invalid data provided for validation', { originalData }); throw new ValidationError( 'ERR_1001', 'Invalid data format for validation: expected array', { providedType: typeof originalData } ); } if (!context || typeof context !== 'string' || context.trim().length === 0) { Logger.warn('Invalid context provided for validation'); throw new ValidationError( 'ERR_1001', 'Empty or invalid context for validation' ); } // Check cache first const cachedResults = researchCache.getValidationResults(context, originalData); if (cachedResults) { Logger.debug(`Cache hit for validation with context: "${context}"`); return cachedResults; } const researchQuery = `Validate and provide context for: ${context}`; try { // Use rate limit manager for validation requests to ensure they respect limits // These are typically more intensive operations const results = await rateLimitManager.executeRateLimitedRequest( async () => { // Perform targeted research for validation const searchResults = await this.search({ query: researchQuery, numResults: 3, useWebResults: true, useNewsResults: false, includeContents: true, }); // Extract relevant facts using advanced extraction with the context const researchContext = []; // Process each result's content with the advanced extractor for (const result of searchResults.results) { if (result.contents) { const facts = factExtractor.extractFacts(result.contents, { maxFacts: 3, minLength: 40, maxLength: 250, requireVerbs: true, filterBoilerplate: true, contextQuery: context, // Use the context to improve relevance scoring }); // Add the extracted facts researchContext.push(...facts.map((fact) => fact.text)); } } Logger.debug( `Validation research returned ${researchContext.length} context items using advanced extraction` ); // Perform basic validation checks using the research context // In a complete implementation, you would do more sophisticated validation const validatedData = [...originalData]; return { validatedData, researchContext, }; }, { provider: 'exa', endpoint: 'exa/validate', maxRetries: 3, initialDelayMs: 2000, // More conservative for validation requests maxDelayMs: 60000, timeoutMs: 90000, // Longer timeout for validation useJitter: true, } ); // Cache the validation results researchCache.setValidationResults(context, originalData, results); return results; } catch (error) { Logger.error(`Data validation request failed for context: ${context}`, error); // For validation errors, return empty context but original data if (error instanceof ValidationError) { return { validatedData: originalData, researchContext: [], }; } // For API errors, return original data with empty context if (error instanceof APIError) { Logger.warn(`API error during validation: ${error.message}`); return { validatedData: originalData, researchContext: [], }; } // For other errors, wrap in a DataProcessingError throw new DataProcessingError( 'ERR_1001', `Data validation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, { context, dataLength: originalData.length } ); } } } // Export utility for use across tools export const exaResearch = new ExaResearchTool(); // Optional: Registration function for MCP Server export function registerExaResearch(server: any) { try { // In a real implementation, this would register the tool with the server Logger.info('Exa Research tool registered'); return true; } catch (error) { Logger.error('Failed to register Exa Research tool', error); return false; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/quanticsoul4772/analytical-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server