Skip to main content
Glama

Open Search MCP

by flyanima
MIT License
2
  • Apple
  • Linux
saturated-search-manager.ts15 kB
/** * Saturated Search Manager * Manages concurrent searches across 200+ search sources, implementing true web-wide saturated search */ import { Logger } from '../utils/logger.js'; import { COMPREHENSIVE_SEARCH_SOURCES, SearchSourceConfig, SearchSourceCategory, getAllActiveSources, getSourcesByDomain, getHighPrioritySources } from '../config/comprehensive-search-sources.js'; import { ToolRegistry } from '../tools/tool-registry.js'; // import { PerformanceMonitor } from '../utils/performance-monitor.js'; // Simplified export interface SaturatedSearchQuery { topic: string; domains?: string[]; languages?: string[]; maxConcurrentRequests?: number; priorityThreshold?: number; includeAuthRequired?: boolean; timeoutMs?: number; } export interface SaturatedSearchResult { sourceId: string; sourceName: string; results: any[]; success: boolean; error?: string; responseTime: number; resultCount: number; } export interface SaturatedSearchSummary { totalSources: number; successfulSources: number; failedSources: number; totalResults: number; averageResponseTime: number; searchDuration: number; sourcesUsed: string[]; errors: Array<{sourceId: string, error: string}>; } /** * 饱和搜索管理器 * 实现200+搜索源的并发搜索 */ export class SaturatedSearchManager { private logger: Logger; private toolRegistry: ToolRegistry; // private performanceMonitor: PerformanceMonitor; // Simplified private activeRequests: Map<string, Promise<SaturatedSearchResult>> = new Map(); constructor(toolRegistry: ToolRegistry) { this.logger = new Logger('SaturatedSearchManager'); this.toolRegistry = toolRegistry; // this.performanceMonitor = new PerformanceMonitor(); // Simplified } /** * 执行饱和搜索 - 200+搜索源并发搜索 */ async executeSaturatedSearch(query: SaturatedSearchQuery): Promise<{ results: SaturatedSearchResult[]; summary: SaturatedSearchSummary; }> { const startTime = Date.now(); this.logger.info(`Starting saturated search for: "${query.topic}"`); // 1. 选择搜索源 const selectedSources = this.selectSearchSources(query); this.logger.info(`Selected ${selectedSources.length} search sources for saturated search`); // 2. 创建并发搜索任务 const searchTasks = this.createSearchTasks(selectedSources, query); // 3. 执行并发搜索(带速率限制和错误处理) const results = await this.executeConcurrentSearches(searchTasks, query); // 4. 生成搜索摘要 const summary = this.generateSearchSummary(results, selectedSources, startTime); this.logger.info(`Saturated search completed: ${summary.successfulSources}/${summary.totalSources} sources successful, ${summary.totalResults} total results`); return { results, summary }; } /** * 选择搜索源 */ private selectSearchSources(query: SaturatedSearchQuery): SearchSourceConfig[] { let sources: SearchSourceConfig[] = []; // 获取所有活跃的搜索源 const allSources = getAllActiveSources(); // 根据领域过滤 if (query.domains && query.domains.length > 0) { const domainSources = new Set<SearchSourceConfig>(); for (const domain of query.domains) { const domainSpecificSources = getSourcesByDomain(domain); domainSpecificSources.forEach(source => domainSources.add(source)); } sources = Array.from(domainSources); } else { sources = allSources; } // 根据优先级过滤 if (query.priorityThreshold !== undefined) { sources = sources.filter(source => source.priority >= query.priorityThreshold!); } // 根据认证要求过滤 if (!query.includeAuthRequired) { sources = sources.filter(source => !source.requiresAuth); } // 根据语言过滤 if (query.languages && query.languages.length > 0) { sources = sources.filter(source => !source.languages || source.languages.some(lang => query.languages!.includes(lang)) ); } // 按优先级排序 sources.sort((a, b) => b.priority - a.priority); this.logger.debug(`Source selection: ${sources.length} sources selected from ${allSources.length} total sources`); return sources; } /** * 创建搜索任务 */ private createSearchTasks( sources: SearchSourceConfig[], query: SaturatedSearchQuery ): Array<{source: SearchSourceConfig, searchParams: any}> { return sources.map(source => ({ source, searchParams: { query: query.topic, maxResults: source.maxResults, // 添加源特定的参数 ...(source.domains && { domains: source.domains }), ...(source.languages && { languages: source.languages }) } })); } /** * 执行并发搜索 */ private async executeConcurrentSearches( tasks: Array<{source: SearchSourceConfig, searchParams: any}>, query: SaturatedSearchQuery ): Promise<SaturatedSearchResult[]> { const maxConcurrent = query.maxConcurrentRequests || 50; // 默认50个并发请求 const timeoutMs = query.timeoutMs || 30000; // 默认30秒超时 const results: SaturatedSearchResult[] = []; const semaphore = new Semaphore(maxConcurrent); // 创建所有搜索Promise const searchPromises = tasks.map(async (task) => { return semaphore.acquire().then(async (release) => { try { const result = await this.executeSearchTask(task, timeoutMs); return result; } finally { release(); } }); }); // 等待所有搜索完成 const settledResults = await Promise.allSettled(searchPromises); // 处理结果 settledResults.forEach((settledResult, index) => { if (settledResult.status === 'fulfilled') { results.push(settledResult.value); } else { // 处理Promise被拒绝的情况 const task = tasks[index]; results.push({ sourceId: task.source.id, sourceName: task.source.name, results: [], success: false, error: settledResult.reason?.message || 'Unknown error', responseTime: 0, resultCount: 0 }); } }); return results; } /** * 执行单个搜索任务 */ private async executeSearchTask( task: {source: SearchSourceConfig, searchParams: any}, timeoutMs: number ): Promise<SaturatedSearchResult> { const { source, searchParams } = task; const startTime = Date.now(); try { // 获取对应的搜索工具 const toolName = this.getToolNameForSource(source); const tool = this.toolRegistry.getTool(toolName); if (!tool) { throw new Error(`No tool found for source: ${source.id}`); } // 执行搜索(带超时) const searchPromise = tool.execute(searchParams); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error('Search timeout')), timeoutMs); }); const toolResult = await Promise.race([searchPromise, timeoutPromise]); const responseTime = Date.now() - startTime; // 标准化结果 const results = this.standardizeResults(toolResult, source); return { sourceId: source.id, sourceName: source.name, results: results, success: true, responseTime: responseTime, resultCount: results.length }; } catch (error) { const responseTime = Date.now() - startTime; this.logger.warn(`Search failed for ${source.id}: ${error}`); return { sourceId: source.id, sourceName: source.name, results: [], success: false, error: error instanceof Error ? error.message : String(error), responseTime: responseTime, resultCount: 0 }; } } /** * 根据搜索源获取对应的工具名称 * 为200+搜索源提供完整的工具映射 */ private getToolNameForSource(source: SearchSourceConfig): string { // 建立搜索源ID到实际MCP工具名称的完整映射 const toolMappings: Record<string, string> = { // 学术搜索工具 - 使用专门的paper search工具 'google_scholar': 'search_google_scholar_paper_search_server', 'pubmed_search': 'search_pubmed_paper_search_server', 'arxiv_search': 'search_arxiv_paper_search_server', 'semantic_scholar': 'search_semantic_paper_search_server', 'biorxiv_search': 'search_biorxiv_paper_search_server', 'medrxiv_search': 'search_medrxiv_paper_search_server', 'iacr_search': 'search_iacr_paper_search_server', // 其他学术期刊和数据库 - 使用研究论文搜索工具 'ieee_xplore': 'research_paper_search_exa_exa', 'acm_digital_library': 'research_paper_search_exa_exa', 'springer_link': 'research_paper_search_exa_exa', 'sciencedirect': 'research_paper_search_exa_exa', 'nature_journals': 'research_paper_search_exa_exa', 'science_magazine': 'research_paper_search_exa_exa', 'cell_press': 'research_paper_search_exa_exa', 'plos_journals': 'research_paper_search_exa_exa', 'frontiers_journals': 'research_paper_search_exa_exa', 'lancet_journals': 'research_paper_search_exa_exa', 'nejm_journal': 'research_paper_search_exa_exa', 'jama_network': 'research_paper_search_exa_exa', 'bmj_journals': 'research_paper_search_exa_exa', 'wiley_journals': 'research_paper_search_exa_exa', 'taylor_francis': 'research_paper_search_exa_exa', 'sage_journals': 'research_paper_search_exa_exa', 'cambridge_journals': 'research_paper_search_exa_exa', 'oxford_journals': 'research_paper_search_exa_exa', 'mdpi_journals': 'research_paper_search_exa_exa', 'microsoft_academic': 'research_paper_search_exa_exa', 'web_of_science': 'research_paper_search_exa_exa', 'scopus_search': 'research_paper_search_exa_exa', 'jstor_search': 'research_paper_search_exa_exa', 'proquest_search': 'research_paper_search_exa_exa', 'ebsco_search': 'research_paper_search_exa_exa', 'cochrane_library': 'research_paper_search_exa_exa', 'lens_org': 'research_paper_search_exa_exa', 'dimensions_ai': 'research_paper_search_exa_exa', 'crossref_search': 'research_paper_search_exa_exa', 'dblp_search': 'research_paper_search_exa_exa', 'citeseerx_search': 'research_paper_search_exa_exa', 'core_search': 'research_paper_search_exa_exa', 'base_search': 'research_paper_search_exa_exa', 'openaire_search': 'research_paper_search_exa_exa', // 预印本服务器 'engrxiv_search': 'research_paper_search_exa_exa', 'psyarxiv_search': 'research_paper_search_exa_exa', 'socarxiv_search': 'research_paper_search_exa_exa', 'chemrxiv_search': 'research_paper_search_exa_exa', 'techrxiv_search': 'research_paper_search_exa_exa', 'eartharxiv_search': 'research_paper_search_exa_exa', 'authorea_search': 'research_paper_search_exa_exa', 'figshare_search': 'research_paper_search_exa_exa', 'zenodo_search': 'research_paper_search_exa_exa', 'researchgate_search': 'research_paper_search_exa_exa', // 论文库 'ndltd_search': 'research_paper_search_exa_exa', 'dart_europe': 'research_paper_search_exa_exa', 'theses_canada': 'research_paper_search_exa_exa', 'ethos_bl': 'research_paper_search_exa_exa', 'oatd_search': 'research_paper_search_exa_exa', // 专门搜索工具 'linkedin_search': 'linkedin_search_exa_exa', 'wikipedia_search': 'wikipedia_search_exa_exa', 'github_search': 'github_search_exa_exa', 'company_research': 'company_research_exa_exa' }; // 如果没有找到特定映射,根据类别选择合适的工具 if (toolMappings[source.id]) { return toolMappings[source.id]; } // 根据搜索源类别选择默认工具 switch (source.category) { case 'academic_journals': case 'academic_databases': case 'preprint_servers': case 'thesis_repositories': return 'research_paper_search_exa_exa'; case 'professional_networks': return 'linkedin_search_exa_exa'; case 'encyclopedias': case 'reference_sites': return 'wikipedia_search_exa_exa'; case 'developer_guides': case 'api_docs': return 'github_search_exa_exa'; case 'company_databases': case 'market_research': case 'industry_reports': return 'company_research_exa_exa'; default: // 所有其他类型使用通用web搜索 return 'web_search_exa_exa'; } } /** * 标准化搜索结果 */ private standardizeResults(toolResult: any, source: SearchSourceConfig): any[] { // 根据不同的工具结果格式进行标准化 if (Array.isArray(toolResult)) { return toolResult; } else if (toolResult && Array.isArray(toolResult.results)) { return toolResult.results; } else if (toolResult && toolResult.data && Array.isArray(toolResult.data)) { return toolResult.data; } else { return []; } } /** * 生成搜索摘要 */ private generateSearchSummary( results: SaturatedSearchResult[], sources: SearchSourceConfig[], startTime: number ): SaturatedSearchSummary { const successfulResults = results.filter(r => r.success); const failedResults = results.filter(r => !r.success); const totalResults = successfulResults.reduce((sum, r) => sum + r.resultCount, 0); const averageResponseTime = results.length > 0 ? results.reduce((sum, r) => sum + r.responseTime, 0) / results.length : 0; return { totalSources: sources.length, successfulSources: successfulResults.length, failedSources: failedResults.length, totalResults: totalResults, averageResponseTime: averageResponseTime, searchDuration: Date.now() - startTime, sourcesUsed: sources.map(s => s.name), errors: failedResults.map(r => ({ sourceId: r.sourceId, error: r.error || 'Unknown error' })) }; } } /** * 信号量实现,用于控制并发数量 */ class Semaphore { private permits: number; private waitQueue: Array<() => void> = []; constructor(permits: number) { this.permits = permits; } async acquire(): Promise<() => void> { return new Promise((resolve) => { if (this.permits > 0) { this.permits--; resolve(() => this.release()); } else { this.waitQueue.push(() => { this.permits--; resolve(() => this.release()); }); } }); } private release(): void { this.permits++; if (this.waitQueue.length > 0) { const next = this.waitQueue.shift()!; next(); } } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/flyanima/open-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server