Web Tools MCP Server

research-agent.ts•16.9 kB

// Research Agent - Core functionality for objective-driven research // Local modules import { executeClaudeCLI } from './claude-cli.js'; import { loadPersistentBlockedDomains, extractDomainFromUrl, addToPersistentBlockedDomains, classifyFetchError } from './domain-blocking.js'; import { enhancedContentExtraction, extractLinksWithPriority } from './helpers.js'; import { logger } from './logger.js'; import { ResearchAgentParams, ResearchAgentResult, ResearchState, ResearchObjective, ExtractedQuote, ObjectiveStatus, WebSearchParams, WebFetchParams, CombinedAnalysisResult } from './types.js'; // Research Agent - Combined Content Analysis and Query Planning using Claude CLI export async function analyzeContentAndPlan(content: string, objectives: ResearchObjective[], sourceUrl: string, incompleteObjectives: ResearchObjective[]): Promise<CombinedAnalysisResult> { try { const objectivesList = objectives.map((obj, i) => `${i + 1}. ${obj.question}`).join('\n'); const incompleteList = incompleteObjectives.map((obj, i) => `${obj.id}. ${obj.question}`).join('\n'); const combinedPrompt = `You are a research assistant that extracts comprehensive information and plans next research steps. Analyze the provided content and return a JSON response with TWO parts: 1. EXTRACT QUOTES: Find multiple comprehensive quotes that captures all relevant information from this webpage 2. PLAN NEXT QUERY: Generate an intelligent next search query based on research gaps Research Objectives: ${objectivesList} Incomplete Objectives (need more research): ${incompleteList} Content Source: ${sourceUrl} Format your response as this exact JSON structure: { "quotes": [ { "quote": "comprehensive summary of all relevant information from this webpage", "objective_ids": ["1", "2"] } ], "next_query": "intelligent search query focusing on biggest research gaps" } QUOTE EXTRACTION GUIDELINES: - Extract multiple comprehensive quotes that summarizes ALL relevant information from the webpage - Include all key facts, features, capabilities, and details that answer the research objectives - Combine multiple pieces of information into a single comprehensive summary - Use empty array if no relevant information found NEXT QUERY GUIDELINES: - Focus on objectives that need more information - Be specific and targeted, not generic - Include relevant keywords and context - Consider what information is still missing Content: ${content.slice(0, 8000)}`; // Use Claude CLI with a normal prompt const result = await executeClaudeCLI({ prompt: combinedPrompt }); if (!result.results) { return { extracted_quotes: [], next_query: "" }; } // Parse the JSON response const responseText = result.results.toString(); const jsonMatch = responseText.match(/\{[\s\S]*\}/); if (!jsonMatch) { logger.error('No JSON object found in Claude response', 'analyzeContentAndPlan'); return { extracted_quotes: [], next_query: "" }; } const parsedResult = JSON.parse(jsonMatch[0]); // Convert quotes to ExtractedQuote format const extractedQuotes: ExtractedQuote[] = (parsedResult.quotes || []).map((q: any) => ({ quote: q.quote, source_url: sourceUrl, objective_ids: q.objective_ids || [], timestamp: new Date().toISOString() })); return { extracted_quotes: extractedQuotes, next_query: parsedResult.next_query || "" }; } catch (error) { logger.error('Error in combined content analysis and planning', 'analyzeContentAndPlan', error instanceof Error ? error : new Error(String(error))); return { extracted_quotes: [], next_query: "" }; } } // Research State Management Functions export function initializeResearchState(objectives: string[], startingQuery: string): ResearchState { const researchObjectives: ResearchObjective[] = objectives.map((q, i) => ({ id: (i + 1).toString(), question: q, completed: false })); const objectiveStatus: ObjectiveStatus[] = researchObjectives.map(obj => ({ objective_id: obj.id, completed: false, supporting_quotes: [] })); // Load persistent blocked domains and merge with runtime failed domains const persistentBlocked = loadPersistentBlockedDomains(); const initialFailedDomains = new Set<string>(persistentBlocked); return { objectives: researchObjectives, accumulated_quotes: [], objective_status: objectiveStatus, iteration_count: 0, source_tracker: new Set<string>(), failed_domains: initialFailedDomains, last_query: startingQuery }; } export function saveExtractedQuotes(state: ResearchState, newQuotes: ExtractedQuote[]): void { // Add new quotes to accumulated quotes state.accumulated_quotes.push(...newQuotes); // Update objective status with new quotes for (const quote of newQuotes) { for (const objectiveId of quote.objective_ids) { const status = state.objective_status.find(s => s.objective_id === objectiveId); if (status) { status.supporting_quotes.push(quote); // Mark as completed if we have multiple quotes status.completed = status.supporting_quotes.length >= 2; } } } // Update research objectives completion status for (const objective of state.objectives) { const status = state.objective_status.find(s => s.objective_id === objective.id); if (status) { objective.completed = status.completed; } } } export function assessObjectiveCompletion(state: ResearchState): { allComplete: boolean; completionRate: number; incompleteObjectives: ResearchObjective[] } { const completed = state.objectives.filter(obj => obj.completed); const incomplete = state.objectives.filter(obj => !obj.completed); return { allComplete: completed.length === state.objectives.length, completionRate: completed.length / state.objectives.length, incompleteObjectives: incomplete }; } export function deduplicateQuotes(quotes: ExtractedQuote[]): ExtractedQuote[] { const seen = new Set<string>(); return quotes.filter(quote => { const key = `${quote.quote.slice(0, 100)}-${quote.source_url}`; if (seen.has(key)) { return false; } seen.add(key); return true; }); } // Main Research Agent Implementation export async function executeResearchAgent(params: ResearchAgentParams): Promise<ResearchAgentResult> { const maxCalls = params.max_calls || 5; const state = initializeResearchState(params.objectives, params.starting_query); let currentQuery = params.starting_query; let terminationReason: 'all_objectives_complete' | 'max_calls_reached' | 'no_new_information' = 'max_calls_reached'; for (let iteration = 1; iteration <= maxCalls; iteration++) { state.iteration_count = iteration; try { // Step 1: Web search with updated blocked domains const combinedBlockedDomains = [ ...(params.blocked_domains || []), ...Array.from(state.failed_domains) ]; const searchParams: WebSearchParams = { query: currentQuery, allowed_domains: params.allowed_domains, blocked_domains: combinedBlockedDomains, auto_research: false }; const searchParams2: any = { query: searchParams.query }; if (searchParams.allowed_domains) { searchParams2.allowed_domains = searchParams.allowed_domains; } if (searchParams.blocked_domains) { searchParams2.blocked_domains = searchParams.blocked_domains; } const searchResult = await executeClaudeCLI({ toolName: "web_search", allowedTools: "web_search", parameters: searchParams2 }); // Step 2: Auto-fetch top URLs const searchContent = searchResult.results || ''; const priorityUrls = extractLinksWithPriority(searchContent, currentQuery, 3); if (priorityUrls.length === 0) { continue; } // Step 3: Parallel URL fetching and processing const fetchPromises = priorityUrls.map(async ({ url, score }) => { // Skip if we've already processed this source if (state.source_tracker.has(url)) { return null; } // Check if domain is in failed domains const domain = extractDomainFromUrl(url); if (domain && state.failed_domains.has(domain)) { return null; } try { const fetchParams: WebFetchParams = { url }; // Use individual 120s timeout to match Claude CLI timeout const fetchResult = await Promise.race([ executeClaudeCLI({ toolName: "web_fetch", allowedTools: "WebFetch", parameters: { url: fetchParams.url } }), new Promise<never>((_, reject) => setTimeout(() => reject(new Error('Individual fetch timeout')), 120000) ) ]); if (fetchResult.results && typeof fetchResult.results === 'string') { const extracted = enhancedContentExtraction(fetchResult.results, url); // Only process if we got substantial content if (extracted.content.length > 500) { // Mark source as processed state.source_tracker.add(url); return { url, extracted, domain }; } else { // Short content - just continue research, don't block domain logger.info(`Short content from ${domain}, continuing research`, 'executeResearchAgent'); return null; } } else { // No fetch results - just continue research, don't block domain logger.info(`No results from ${domain}, continuing research`, 'executeResearchAgent'); return null; } } catch (fetchError) { // Smart error classification for domain blocking if (domain && fetchError instanceof Error) { const classification = classifyFetchError(fetchError); switch (classification.type) { case 'permanent_block': // Only block permanently for explicit access restrictions (403/401/429) addToPersistentBlockedDomains(domain); logger.info(`Permanently blocked ${domain}: ${classification.reason}`, 'executeResearchAgent'); break; case 'session_block': // Block for this session only for network issues state.failed_domains.add(domain); logger.info(`Session blocked ${domain}: ${classification.reason}`, 'executeResearchAgent'); break; case 'continue': // Don't block domain, just continue research logger.info(`Continuing research despite error from ${domain}: ${classification.reason}`, 'executeResearchAgent'); break; } } return null; } }); // Wait for all fetches to complete const fetchResults = await Promise.allSettled(fetchPromises); const successfulFetches = fetchResults .filter(result => result.status === 'fulfilled' && result.value !== null) .map(result => (result as PromiseFulfilledResult<any>).value); // Process content analysis for successful fetches in parallel if (successfulFetches.length > 0) { const completion = assessObjectiveCompletion(state); const analysisPromises = successfulFetches.map(async ({ url, extracted }) => { try { const analysisResult = await analyzeContentAndPlan( extracted.content, state.objectives, url, completion.incompleteObjectives ); return analysisResult; } catch (error) { logger.error(`Error analyzing content from ${url}`, 'executeResearchAgent', error instanceof Error ? error : new Error(String(error))); return { extracted_quotes: [], next_query: "" }; } }); const analysisResults = await Promise.allSettled(analysisPromises); // Collect quotes and next query suggestions let bestNextQuery = ""; for (const result of analysisResults) { if (result.status === 'fulfilled') { const analysis = result.value; if (analysis.extracted_quotes.length > 0) { saveExtractedQuotes(state, analysis.extracted_quotes); } // Use the first non-empty next query suggestion if (analysis.next_query && !bestNextQuery) { bestNextQuery = analysis.next_query; } } } // Store the best next query suggestion if (bestNextQuery) { state.last_query = bestNextQuery; } } // Step 4: Assess completion const completion = assessObjectiveCompletion(state); if (completion.allComplete) { terminationReason = 'all_objectives_complete'; break; } // Step 5: Use the intelligent query from combined analysis if (iteration < maxCalls && state.last_query) { currentQuery = state.last_query; } } catch (error) { } } // Deduplicate quotes before final result state.accumulated_quotes = deduplicateQuotes(state.accumulated_quotes); // Generate final summary const finalCompletion = assessObjectiveCompletion(state); const totalQuotes = state.accumulated_quotes.length; const sourcesCount = state.source_tracker.size; const finalSummary = `Research completed after ${state.iteration_count} iterations. ` + `Objectives completed: ${finalCompletion.completionRate * 100}% (${state.objectives.filter(o => o.completed).length}/${state.objectives.length}). ` + `Total quotes extracted: ${totalQuotes} from ${sourcesCount} sources.`; return { completed_objectives: state.objective_status, all_quotes: state.accumulated_quotes, iteration_count: state.iteration_count, termination_reason: terminationReason, final_summary: finalSummary }; } // Format Research Agent Results export function formatResearchAgentResults(result: ResearchAgentResult): string { const sections: string[] = []; // Header with summary sections.push(`# Research Agent Results\n`); sections.push(`**Final Summary:** ${result.final_summary}\n`); sections.push(`**Termination Reason:** ${result.termination_reason.replace(/_/g, ' ')}`); sections.push(`**Iterations Completed:** ${result.iteration_count}\n`); // Objectives Status sections.push(`## Objectives Status\n`); result.completed_objectives.forEach((obj, i) => { const statusIcon = obj.completed ? '✅' : '❌'; sections.push(`${statusIcon} **Objective ${obj.objective_id}**: (${obj.supporting_quotes.length} quotes)`); }); sections.push(''); // All Extracted Quotes if (result.all_quotes.length > 0) { sections.push(`## Extracted Quotes (${result.all_quotes.length} total)\n`); // Group quotes by objective const quotesByObjective = new Map<string, ExtractedQuote[]>(); result.all_quotes.forEach(quote => { quote.objective_ids.forEach(objId => { if (!quotesByObjective.has(objId)) { quotesByObjective.set(objId, []); } quotesByObjective.get(objId)!.push(quote); }); }); // Display quotes organized by objective quotesByObjective.forEach((quotes, objId) => { const objective = result.completed_objectives.find(o => o.objective_id === objId); sections.push(`### Objective ${objId} Quotes\n`); quotes.forEach((quote, i) => { sections.push(`**Quote ${i + 1}**`); sections.push(`Source: ${quote.source_url}`); sections.push(`> "${quote.quote}"`); sections.push(''); }); }); // Also show any quotes that don't match specific objectives const unmatchedQuotes = result.all_quotes.filter(q => q.objective_ids.length === 0); if (unmatchedQuotes.length > 0) { sections.push(`### Additional Quotes\n`); unmatchedQuotes.forEach((quote, i) => { sections.push(`**Quote ${i + 1}**`); sections.push(`Source: ${quote.source_url}`); sections.push(`> "${quote.quote}"`); sections.push(''); }); } } else { sections.push(`## No Quotes Extracted\n`); sections.push('No relevant quotes were found during the research process.'); } return sections.join('\n'); }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/prestonfong/cc-webtools-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server