Skip to main content
Glama
research-mcp.old.ts.bak65.8 kB
import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; import { GoogleSearchService } from './services/google-search.service.js'; import { ContentExtractor } from './services/content-extractor.service.js'; import { ResearchEnhancer } from './services/research-enhancer.service.js'; import { EnhancedContentExtractor } from './services/enhanced-content-extractor.service.js'; import { BrowsingSessionService } from './services/browsing-session.service.js'; import { NavigationService } from './services/navigation.service.js'; import { MultiSourceSynthesizer } from './services/multi-source-synthesizer.service.js'; import { OutputFormat } from './types.js'; import { v4 as uuidv4 } from 'uuid'; class GoogleResearchServer { private server: Server; private searchService: GoogleSearchService; private contentExtractor: ContentExtractor; private enhancedContentExtractor: EnhancedContentExtractor; private researchEnhancer: ResearchEnhancer; private browsingSessionService: BrowsingSessionService; private navigationService: NavigationService; private multiSourceSynthesizer: MultiSourceSynthesizer; // Track active browsing sessions private activeSessions: Map<string, { id: string; topic?: string; lastActivity: Date }> = new Map(); constructor() { this.searchService = new GoogleSearchService(); this.contentExtractor = new ContentExtractor(); this.enhancedContentExtractor = new EnhancedContentExtractor(); this.researchEnhancer = new ResearchEnhancer(); // Initialize the services that depend on other services this.browsingSessionService = new BrowsingSessionService(this.contentExtractor, this.searchService); this.navigationService = new NavigationService(this.browsingSessionService, this.enhancedContentExtractor); this.multiSourceSynthesizer = new MultiSourceSynthesizer(this.researchEnhancer, this.enhancedContentExtractor); this.server = new Server( { name: 'google-research', version: '1.0.0' }, { capabilities: { tools: { google_search: { description: 'Search Google and return relevant results from the web. This tool finds web pages, articles, and information on specific topics using Google\'s search engine. Results include titles, snippets, and URLs that can be analyzed further using extract_webpage_content.', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query - be specific and use quotes for exact matches. For best results, use clear keywords and avoid very long queries.' }, num_results: { type: 'number', description: 'Number of results to return (default: 5, max: 10). Increase for broader coverage, decrease for faster response.' }, site: { type: 'string', description: 'Limit search results to a specific website domain (e.g., "wikipedia.org" or "nytimes.com").' }, language: { type: 'string', description: 'Filter results by language using ISO 639-1 codes (e.g., "en" for English, "es" for Spanish, "fr" for French).' }, dateRestrict: { type: 'string', description: 'Filter results by date using Google\'s date restriction format: "d[number]" for past days, "w[number]" for past weeks, "m[number]" for past months, or "y[number]" for past years. Example: "m6" for results from the past 6 months.' }, exactTerms: { type: 'string', description: 'Search for results that contain this exact phrase. This is equivalent to putting the terms in quotes in the search query.' }, resultType: { type: 'string', description: 'Specify the type of results to return. Options include "image" (or "images"), "news", and "video" (or "videos"). Default is general web results.' }, page: { type: 'number', description: 'Page number for paginated results (starts at 1). Use in combination with resultsPerPage to navigate through large result sets.' }, resultsPerPage: { type: 'number', description: 'Number of results to show per page (default: 5, max: 10). Controls how many results are returned for each page.' }, sort: { type: 'string', description: 'Sorting method for search results. Options: "relevance" (default) or "date" (most recent first).' } }, required: ['query'] } }, extract_webpage_content: { description: 'Extract and analyze content from a webpage, converting it to readable text. This tool fetches the main content while removing ads, navigation elements, and other clutter. Use it to get detailed information from specific pages found via google_search. Works with most common webpage formats including articles, blogs, and documentation.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'Full URL of the webpage to extract content from (must start with http:// or https://). Ensure the URL is from a public webpage and not behind authentication.' }, format: { type: 'string', description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".' }, full_content: { type: 'boolean', description: 'Whether to return the full content of the webpage (true) or just a preview (false). Default is false.' } }, required: ['url'] } }, extract_multiple_webpages: { description: 'Extract and analyze content from multiple webpages in a single request. This tool is ideal for comparing information across different sources or gathering comprehensive information on a topic. Limited to 5 URLs per request to maintain performance.', inputSchema: { type: 'object', properties: { urls: { type: 'array', items: { type: 'string' }, description: 'Array of webpage URLs to extract content from. Each URL must be public and start with http:// or https://. Maximum 5 URLs per request.' }, format: { type: 'string', description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".' } }, required: ['urls'] } }, research_topic: { description: 'Deeply research a topic by searching for relevant information, extracting content from multiple sources, and organizing it into a comprehensive markdown document. This tool helps develop a thorough understanding of complex or unfamiliar topics.', inputSchema: { type: 'object', properties: { topic: { type: 'string', description: 'The topic to research. Be specific to get the most relevant results.' }, depth: { type: 'string', description: 'The level of depth for the research: "basic" (overview), "intermediate" (detailed), or "advanced" (comprehensive). Default is "intermediate".' }, focus_areas: { type: 'array', items: { type: 'string' }, description: 'Specific aspects of the topic to focus on. For example, for "quantum computing" you might specify ["applications", "limitations", "recent advances"].' }, num_sources: { type: 'number', description: 'Maximum number of sources to include in the research (default: 5, max: 10).' } }, required: ['topic'] } }, synthesize_content: { description: 'Synthesize content from multiple webpages into a cohesive, structured document. This tool extracts relevant information from multiple sources, identifies common themes and contradictions, and organizes the information into a well-structured format.', inputSchema: { type: 'object', properties: { urls: { type: 'array', items: { type: 'string' }, description: 'Array of webpage URLs to extract and synthesize content from. Each URL must be public and start with http:// or https://.' }, focus: { type: 'string', description: 'A specific aspect or question to focus on when synthesizing the content. This helps filter out irrelevant information.' }, structure: { type: 'string', description: 'The structure to use for the synthesized content: "chronological", "thematic", "compare_contrast", or "question_answer". Default is "thematic".' } }, required: ['urls'] } }, summarize_webpage: { description: 'Generate a comprehensive summary of a webpage\'s content. This tool extracts the key points, main ideas, and essential information from a webpage, condensing it into a concise summary.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'Full URL of the webpage to summarize (must start with http:// or https://). Ensure the URL is from a public webpage and not behind authentication.' }, length: { type: 'string', description: 'Desired length of the summary: "short" (250 words), "medium" (500 words), or "long" (1000 words). Default is "medium".' }, focus: { type: 'string', description: 'Optional specific aspect to focus on when summarizing the content.' } }, required: ['url'] } }, contextual_navigation: { description: 'Navigate the web contextually by following relevant links from a starting page. This tool simulates how a human browses by identifying and exploring related content, maintaining context between pages.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'Starting URL for the navigation session (must start with http:// or https://)' }, keywords: { type: 'array', items: { type: 'string' }, description: 'Keywords to guide link relevance assessment. Links containing or related to these keywords will be prioritized.' }, depth: { type: 'number', description: 'Maximum navigation depth (1-3). Higher values explore more links but take longer. Default is 1.' }, max_links: { type: 'number', description: 'Maximum number of links to follow per page (1-5). Default is 3.' }, session_id: { type: 'string', description: 'Optional session ID to continue a previous browsing session. If not provided, a new session will be created.' }, stay_on_domain: { type: 'boolean', description: 'Whether to stay on the same domain as the starting URL. Default is false.' } }, required: ['url'] } }, structured_content_extraction: { description: 'Extract content from a webpage with enhanced structure preservation, maintaining tables, lists, hierarchies, and image context. This tool provides a richer representation of webpage content than standard extraction.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'Full URL of the webpage to extract content from (must start with http:// or https://)' }, format: { type: 'string', description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".' }, preserve_tables: { type: 'boolean', description: 'Whether to preserve table structure in the output. Default is true.' }, extract_images: { type: 'boolean', description: 'Whether to extract images with context information. Default is true.' }, analyze_links: { type: 'boolean', description: 'Whether to analyze and include links with context. Default is true.' } }, required: ['url'] } }, enhanced_synthesis: { description: 'Create an enhanced synthesis of content from multiple sources with advanced capabilities like contradiction detection and source credibility assessment. This tool provides a more nuanced and comprehensive synthesis than standard approaches.', inputSchema: { type: 'object', properties: { urls: { type: 'array', items: { type: 'string' }, description: 'Array of webpage URLs to extract and synthesize content from. Each URL must be public and start with http:// or https://.' }, focus: { type: 'string', description: 'A specific aspect or question to focus on when synthesizing the content.' }, structure: { type: 'string', description: 'The structure to use for the synthesized content: "chronological", "thematic", "compare_contrast", or "question_answer". Default is "thematic".' }, detect_contradictions: { type: 'boolean', description: 'Whether to detect and highlight contradictions between sources. Default is true.' }, assess_credibility: { type: 'boolean', description: 'Whether to include source credibility assessment. Default is true.' }, compare_by: { type: 'array', items: { type: 'string' }, description: 'Aspects to compare when using compare_contrast structure. For example: ["methodology", "results", "limitations"].' } }, required: ['urls'] } } } }); // Register tool list handler this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: 'google_search', description: 'Search Google and return relevant results from the web. This tool finds web pages, articles, and information on specific topics using Google\'s search engine. Results include titles, snippets, and URLs that can be analyzed further using extract_webpage_content.', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query - be specific and use quotes for exact matches. For best results, use clear keywords and avoid very long queries.' }, num_results: { type: 'number', description: 'Number of results to return (default: 5, max: 10). Increase for broader coverage, decrease for faster response.' }, site: { type: 'string', description: 'Limit search results to a specific website domain (e.g., "wikipedia.org" or "nytimes.com").' }, language: { type: 'string', description: 'Filter results by language using ISO 639-1 codes (e.g., "en" for English, "es" for Spanish, "fr" for French).' }, dateRestrict: { type: 'string', description: 'Filter results by date using Google\'s date restriction format: "d[number]" for past days, "w[number]" for past weeks, "m[number]" for past months, or "y[number]" for past years. Example: "m6" for results from the past 6 months.' }, exactTerms: { type: 'string', description: 'Search for results that contain this exact phrase. This is equivalent to putting the terms in quotes in the search query.' }, resultType: { type: 'string', description: 'Specify the type of results to return. Options include "image" (or "images"), "news", and "video" (or "videos"). Default is general web results.' }, page: { type: 'number', description: 'Page number for paginated results (starts at 1). Use in combination with resultsPerPage to navigate through large result sets.' }, resultsPerPage: { type: 'number', description: 'Number of results to show per page (default: 5, max: 10). Controls how many results are returned for each page.' }, sort: { type: 'string', description: 'Sorting method for search results. Options: "relevance" (default) or "date" (most recent first).' } }, required: ['query'] } }, { name: 'extract_webpage_content', description: 'Extract and analyze content from a webpage, converting it to readable text. This tool fetches the main content while removing ads, navigation elements, and other clutter. Use it to get detailed information from specific pages found via google_search. Works with most common webpage formats including articles, blogs, and documentation.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'Full URL of the webpage to extract content from (must start with http:// or https://). Ensure the URL is from a public webpage and not behind authentication.' }, format: { type: 'string', description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".' }, full_content: { type: 'boolean', description: 'Whether to return the full content of the webpage (true) or just a preview (false). Default is false.' } }, required: ['url'] } }, { name: 'research_topic', description: 'Deeply research a topic by searching for relevant information, extracting content from multiple sources, and organizing it into a comprehensive markdown document. This tool helps develop a thorough understanding of complex or unfamiliar topics.', inputSchema: { type: 'object', properties: { topic: { type: 'string', description: 'The topic to research. Be specific to get the most relevant results.' }, depth: { type: 'string', description: 'The level of depth for the research: "basic" (overview), "intermediate" (detailed), or "advanced" (comprehensive). Default is "intermediate".' }, focus_areas: { type: 'array', items: { type: 'string' }, description: 'Specific aspects of the topic to focus on. For example, for "quantum computing" you might specify ["applications", "limitations", "recent advances"].' }, num_sources: { type: 'number', description: 'Maximum number of sources to include in the research (default: 5, max: 10).' } }, required: ['topic'] } }, { name: 'synthesize_content', description: 'Synthesize content from multiple webpages into a cohesive, structured document. This tool extracts relevant information from multiple sources, identifies common themes and contradictions, and organizes the information into a well-structured format.', inputSchema: { type: 'object', properties: { urls: { type: 'array', items: { type: 'string' }, description: 'Array of webpage URLs to extract and synthesize content from. Each URL must be public and start with http:// or https://.' }, focus: { type: 'string', description: 'A specific aspect or question to focus on when synthesizing the content. This helps filter out irrelevant information.' }, structure: { type: 'string', description: 'The structure to use for the synthesized content: "chronological", "thematic", "compare_contrast", or "question_answer". Default is "thematic".' } }, required: ['urls'] } }, { name: 'extract_multiple_webpages', description: 'Extract and analyze content from multiple webpages in a single request. This tool is ideal for comparing information across different sources or gathering comprehensive information on a topic. Limited to 5 URLs per request to maintain performance.', inputSchema: { type: 'object', properties: { urls: { type: 'array', items: { type: 'string' }, description: 'Array of webpage URLs to extract content from. Each URL must be public and start with http:// or https://. Maximum 5 URLs per request.' }, format: { type: 'string', description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".' } }, required: ['urls'] } }, { name: 'summarize_webpage', description: 'Generate a comprehensive summary of a webpage\'s content. This tool extracts the key points, main ideas, and essential information from a webpage, condensing it into a concise summary.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'Full URL of the webpage to summarize (must start with http:// or https://). Ensure the URL is from a public webpage and not behind authentication.' }, length: { type: 'string', description: 'Desired length of the summary: "short" (250 words), "medium" (500 words), or "long" (1000 words). Default is "medium".' }, focus: { type: 'string', description: 'Optional specific aspect to focus on when summarizing the content.' } }, required: ['url'] } }, { name: 'contextual_navigation', description: 'Navigate the web contextually by following relevant links from a starting page. This tool simulates how a human browses by identifying and exploring related content, maintaining context between pages.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'Starting URL for the navigation session (must start with http:// or https://)' }, keywords: { type: 'array', items: { type: 'string' }, description: 'Keywords to guide link relevance assessment. Links containing or related to these keywords will be prioritized.' }, depth: { type: 'number', description: 'Maximum navigation depth (1-3). Higher values explore more links but take longer. Default is 1.' }, max_links: { type: 'number', description: 'Maximum number of links to follow per page (1-5). Default is 3.' }, session_id: { type: 'string', description: 'Optional session ID to continue a previous browsing session. If not provided, a new session will be created.' }, stay_on_domain: { type: 'boolean', description: 'Whether to stay on the same domain as the starting URL. Default is false.' } }, required: ['url'] } }, { name: 'structured_content_extraction', description: 'Extract content from a webpage with enhanced structure preservation, maintaining tables, lists, hierarchies, and image context. This tool provides a richer representation of webpage content than standard extraction.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'Full URL of the webpage to extract content from (must start with http:// or https://)' }, format: { type: 'string', description: 'Output format for the extracted content. Options: "markdown" (default), "html", or "text".' }, preserve_tables: { type: 'boolean', description: 'Whether to preserve table structure in the output. Default is true.' }, extract_images: { type: 'boolean', description: 'Whether to extract images with context information. Default is true.' }, analyze_links: { type: 'boolean', description: 'Whether to analyze and include links with context. Default is true.' } }, required: ['url'] } }, { name: 'enhanced_synthesis', description: 'Create an enhanced synthesis of content from multiple sources with advanced capabilities like contradiction detection and source credibility assessment. This tool provides a more nuanced and comprehensive synthesis than standard approaches.', inputSchema: { type: 'object', properties: { urls: { type: 'array', items: { type: 'string' }, description: 'Array of webpage URLs to extract and synthesize content from. Each URL must be public and start with http:// or https://.' }, focus: { type: 'string', description: 'A specific aspect or question to focus on when synthesizing the content.' }, structure: { type: 'string', description: 'The structure to use for the synthesized content: "chronological", "thematic", "compare_contrast", or "question_answer". Default is "thematic".' }, detect_contradictions: { type: 'boolean', description: 'Whether to detect and highlight contradictions between sources. Default is true.' }, assess_credibility: { type: 'boolean', description: 'Whether to include source credibility assessment. Default is true.' }, compare_by: { type: 'array', items: { type: 'string' }, description: 'Aspects to compare when using compare_contrast structure. For example: ["methodology", "results", "limitations"].' } }, required: ['urls'] } ] })); // Register tool call handler this.server.setRequestHandler(CallToolRequestSchema, async (request: any) => { switch (request.params.name) { case 'google_search': if (typeof request.params.arguments === 'object' && request.params.arguments !== null && 'query' in request.params.arguments) { return this.handleSearch({ query: String(request.params.arguments.query), num_results: typeof request.params.arguments.num_results === 'number' ? request.params.arguments.num_results : undefined, filters: { site: request.params.arguments.site ? String(request.params.arguments.site) : undefined, language: request.params.arguments.language ? String(request.params.arguments.language) : undefined, dateRestrict: request.params.arguments.dateRestrict ? String(request.params.arguments.dateRestrict) : undefined, exactTerms: request.params.arguments.exactTerms ? String(request.params.arguments.exactTerms) : undefined, resultType: request.params.arguments.resultType ? String(request.params.arguments.resultType) : undefined, page: typeof request.params.arguments.page === 'number' ? request.params.arguments.page : undefined, resultsPerPage: typeof request.params.arguments.resultsPerPage === 'number' ? request.params.arguments.resultsPerPage : undefined, sort: request.params.arguments.sort ? String(request.params.arguments.sort) : undefined } }); } throw new Error('Invalid arguments for google_search tool'); case 'extract_webpage_content': if (typeof request.params.arguments === 'object' && request.params.arguments !== null && 'url' in request.params.arguments) { return this.handleAnalyzeWebpage({ url: String(request.params.arguments.url), format: request.params.arguments.format ? String(request.params.arguments.format) as OutputFormat : 'markdown', full_content: request.params.arguments.full_content === true }); } throw new Error('Invalid arguments for extract_webpage_content tool'); case 'extract_multiple_webpages': if (typeof request.params.arguments === 'object' && request.params.arguments !== null && 'urls' in request.params.arguments && Array.isArray(request.params.arguments.urls)) { return this.handleExtractMultipleWebpages({ urls: request.params.arguments.urls.map(String), format: request.params.arguments.format ? String(request.params.arguments.format) as OutputFormat : 'markdown' }); } throw new Error('Invalid arguments for extract_multiple_webpages tool'); case 'research_topic': if (typeof request.params.arguments === 'object' && request.params.arguments !== null && 'topic' in request.params.arguments) { return this.handleResearchTopic({ topic: String(request.params.arguments.topic), depth: request.params.arguments.depth ? String(request.params.arguments.depth) : 'intermediate', focus_areas: Array.isArray(request.params.arguments.focus_areas) ? request.params.arguments.focus_areas.map(String) : undefined, num_sources: typeof request.params.arguments.num_sources === 'number' ? request.params.arguments.num_sources : 5 }); } throw new Error('Invalid arguments for research_topic tool'); case 'synthesize_content': if (typeof request.params.arguments === 'object' && request.params.arguments !== null && 'urls' in request.params.arguments && Array.isArray(request.params.arguments.urls)) { return this.handleSynthesizeContent({ urls: request.params.arguments.urls.map(String), focus: request.params.arguments.focus ? String(request.params.arguments.focus) : undefined, structure: request.params.arguments.structure ? String(request.params.arguments.structure) : 'thematic' }); } throw new Error('Invalid arguments for synthesize_content tool'); case 'summarize_webpage': if (typeof request.params.arguments === 'object' && request.params.arguments !== null && 'url' in request.params.arguments) { return this.handleSummarizeWebpage({ url: String(request.params.arguments.url), length: request.params.arguments.length ? String(request.params.arguments.length) : 'medium', focus: request.params.arguments.focus ? String(request.params.arguments.focus) : undefined }); } throw new Error('Invalid arguments for summarize_webpage tool'); case 'contextual_navigation': if (typeof request.params.arguments === 'object' && request.params.arguments !== null && 'url' in request.params.arguments) { return this.handleContextualNavigation({ url: String(request.params.arguments.url), keywords: Array.isArray(request.params.arguments.keywords) ? request.params.arguments.keywords.map(String) : undefined, depth: typeof request.params.arguments.depth === 'number' ? request.params.arguments.depth : 1, maxLinks: typeof request.params.arguments.max_links === 'number' ? request.params.arguments.max_links : 3, sessionId: request.params.arguments.session_id ? String(request.params.arguments.session_id) : undefined, stayOnDomain: request.params.arguments.stay_on_domain === true }); } throw new Error('Invalid arguments for contextual_navigation tool'); case 'structured_content_extraction': if (typeof request.params.arguments === 'object' && request.params.arguments !== null && 'url' in request.params.arguments) { return this.handleStructuredContentExtraction({ url: String(request.params.arguments.url), format: request.params.arguments.format ? String(request.params.arguments.format) as OutputFormat : 'markdown', preserveTables: request.params.arguments.preserve_tables !== false, extractImages: request.params.arguments.extract_images !== false, analyzeLinks: request.params.arguments.analyze_links !== false }); } throw new Error('Invalid arguments for structured_content_extraction tool'); case 'enhanced_synthesis': if (typeof request.params.arguments === 'object' && request.params.arguments !== null && 'urls' in request.params.arguments && Array.isArray(request.params.arguments.urls)) { return this.handleEnhancedSynthesis({ urls: request.params.arguments.urls.map(String), focus: request.params.arguments.focus ? String(request.params.arguments.focus) : undefined, structure: request.params.arguments.structure ? String(request.params.arguments.structure) : 'thematic', detectContradictions: request.params.arguments.detect_contradictions !== false, assessCredibility: request.params.arguments.assess_credibility !== false, compareBy: Array.isArray(request.params.arguments.compare_by) ? request.params.arguments.compare_by.map(String) : undefined }); } throw new Error('Invalid arguments for enhanced_synthesis tool'); default: throw new Error(`Unknown tool: ${request.params.name}`); } }); } private async handleSearch(args: { query: string; num_results?: number; filters?: { site?: string; language?: string; dateRestrict?: string; exactTerms?: string; resultType?: string; page?: number; resultsPerPage?: number; sort?: string; } }) { try { const { results, pagination, categories } = await this.searchService.search(args.query, args.num_results, args.filters); if (results.length === 0) { return { content: [{ type: 'text', text: 'No results found. Try:\n- Using different keywords\n- Removing quotes from non-exact phrases\n- Using more general terms' }], isError: true }; } // Format results in a more concise, readable way const formattedResults = results.map(result => ({ title: result.title, link: result.link, snippet: result.snippet, category: result.category })); // Format results in a more AI-friendly way let responseText = `Search results for "${args.query}":\n\n`; // Add category summary if available if (categories && categories.length > 0) { responseText += "Categories: " + categories.map(c => `${c.name} (${c.count})`).join(', ') + "\n\n"; } // Add pagination info if (pagination) { responseText += `Showing page ${pagination.currentPage}${pagination.totalResults ? ` of approximately ${pagination.totalResults} results` : ''}\n\n`; } // Add each result in a readable format formattedResults.forEach((result, index) => { responseText += `${index + 1}. ${result.title}\n`; responseText += ` URL: ${result.link}\n`; responseText += ` ${result.snippet}\n\n`; }); // Add navigation hints if pagination exists if (pagination && (pagination.hasNextPage || pagination.hasPreviousPage)) { responseText += "Navigation: "; if (pagination.hasPreviousPage) { responseText += "Use 'page: " + (pagination.currentPage - 1) + "' for previous results. "; } if (pagination.hasNextPage) { responseText += "Use 'page: " + (pagination.currentPage + 1) + "' for more results."; } responseText += "\n"; } return { content: [ { type: 'text', text: responseText, }, ], }; } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error during search'; return { content: [{ type: 'text', text: message }], isError: true }; } } private async handleAnalyzeWebpage(args: { url: string; format?: OutputFormat; full_content?: boolean }) { try { const content = await this.contentExtractor.extractContent(args.url, args.format); // Format the response based on whether full content is requested let responseText = `Content from: ${content.url}\n\n`; responseText += `Title: ${content.title}\n`; if (content.description) { responseText += `Description: ${content.description}\n`; } responseText += `\nStats: ${content.stats.word_count} words, ${content.stats.approximate_chars} characters\n\n`; // Add the summary if available if (content.summary) { responseText += `Summary: ${content.summary}\n\n`; } // Add either the full content or just a preview if (args.full_content) { responseText += `Full Content:\n\n${content.content}`; } else { responseText += `Content Preview:\n${content.content_preview.first_500_chars}\n\n`; responseText += `Note: This is a preview of the content. For the full content, use the extract_webpage_content tool with full_content set to true.`; } return { content: [ { type: 'text', text: responseText, }, ], }; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; const helpText = 'Common issues:\n- Check if the URL is accessible in a browser\n- Ensure the webpage is public\n- Try again if it\'s a temporary network issue'; return { content: [ { type: 'text', text: `${errorMessage}\n\n${helpText}`, }, ], isError: true, }; } } private async handleResearchTopic(args: { topic: string; depth?: string; focus_areas?: string[]; num_sources?: number; }) { try { console.error(`Researching topic: "${args.topic}" with depth: ${args.depth || 'intermediate'}`); // Step 1: Perform multiple searches to gather diverse information // First search with the main topic const mainSearchResults = await this.searchService.search( args.topic, Math.ceil((args.num_sources || 5) / 2) ); let allResults = [...mainSearchResults.results]; // If focus areas are provided, perform additional searches if (args.focus_areas && args.focus_areas.length > 0) { // Distribute remaining searches among focus areas const focusAreasSearches = await Promise.all( args.focus_areas.map(area => this.searchService.search( `${args.topic} ${area}`, Math.ceil((args.num_sources || 5) / (args.focus_areas?.length || 1) / 2) ) ) ); // Combine all results focusAreasSearches.forEach(result => { allResults = [...allResults, ...result.results]; }); } // Filter out duplicate URLs and take top N results const uniqueUrls = new Set<string>(); const filteredResults = allResults .filter(result => { if (uniqueUrls.has(result.link)) return false; uniqueUrls.add(result.link); return true; }) .slice(0, args.num_sources || 5); if (filteredResults.length === 0) { return { content: [{ type: 'text', text: `No results found for "${args.topic}". Try using different keywords or a more general topic.` }], isError: true }; } console.error(`Found ${filteredResults.length} unique sources for research`); // Step 2: Extract content from the sources with progress tracking const urls = filteredResults.map(result => result.link); console.error(`Extracting content from ${urls.length} sources...`); const contents = await this.contentExtractor.batchExtractContent(urls, 'markdown'); // Step 3: Use the research enhancer to create a structured research document console.error(`Generating research document for "${args.topic}"`); const researchDocument = await this.researchEnhancer.createResearchDocument({ topic: args.topic, depth: args.depth || 'intermediate', focus_areas: args.focus_areas, sources: Object.entries(contents).map(([url, content]) => { if ('error' in content) { return { url, error: content.error }; } return { url, title: content.title, content: content.content, summary: content.summary || '' }; }) }); return { content: [ { type: 'text', text: researchDocument, }, ], }; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; return { content: [ { type: 'text', text: `Error researching "${args.topic}": ${errorMessage}`, }, ], isError: true, }; } } private async handleSynthesizeContent(args: { urls: string[]; focus?: string; structure?: string; }) { if (args.urls.length > 10) { return { content: [{ type: 'text', text: 'Maximum 10 URLs allowed per synthesis to maintain quality. Please reduce the number of URLs.' }], isError: true }; } try { console.error(`Synthesizing content from ${args.urls.length} sources using ${args.structure || 'thematic'} structure`); // Extract content from all URLs console.error('Extracting content from provided URLs...'); const contents = await this.contentExtractor.batchExtractContent(args.urls, 'markdown'); // Filter out failed extractions const validContents = Object.entries(contents).filter( ([_, content]) => !('error' in content) ); if (validContents.length === 0) { return { content: [{ type: 'text', text: 'Could not extract valid content from any of the provided URLs. Please check the URLs and try again.' }], isError: true }; } console.error(`Successfully extracted content from ${validContents.length}/${args.urls.length} sources`); // Use the research enhancer to synthesize the content console.error(`Generating synthesized content with focus: "${args.focus || 'general'}"`); const synthesizedContent = await this.researchEnhancer.synthesizeContent({ sources: Object.entries(contents).map(([url, content]) => { if ('error' in content) { return { url, error: content.error }; } return { url, title: content.title, content: content.content, summary: content.summary || '' }; }), focus: args.focus, structure: args.structure || 'thematic' }); return { content: [ { type: 'text', text: synthesizedContent, }, ], }; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; return { content: [ { type: 'text', text: `Error synthesizing content: ${errorMessage}`, }, ], isError: true, }; } } private async handleExtractMultipleWebpages(args: { urls: string[]; format?: OutputFormat; }) { try { // Limit to 5 URLs max for performance if (args.urls.length > 5) { return { content: [{ type: 'text', text: 'Maximum 5 URLs allowed per extraction to maintain performance. Please reduce the number of URLs.' }], isError: true }; } console.error(`Extracting content from ${args.urls.length} webpages...`); // Use the batch extraction method from ContentExtractor const contents = await this.contentExtractor.batchExtractContent(args.urls, args.format || 'markdown'); // Format the results in a readable way let responseText = `# Content Extracted from ${args.urls.length} Webpages\n\n`; // Add each webpage's content with clear separation Object.entries(contents).forEach(([url, content], index) => { responseText += `## ${index + 1}. ${url}\n\n`; if ('error' in content) { responseText += `**Error**: ${content.error}\n\n`; return; } responseText += `**Title**: ${content.title}\n`; if (content.description) { responseText += `**Description**: ${content.description}\n`; } responseText += `**Word Count**: ${content.stats.word_count}\n\n`; // Add summary if available if (content.summary) { responseText += `### Summary\n\n${content.summary}\n\n`; } // Add content preview responseText += `### Content Preview\n\n${content.content_preview.first_500_chars}...\n\n`; // Add separator between pages if (index < Object.keys(contents).length - 1) { responseText += `---\n\n`; } }); return { content: [ { type: 'text', text: responseText, }, ], }; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; return { content: [ { type: 'text', text: `Error extracting content from multiple webpages: ${errorMessage}`, }, ], isError: true, }; } } private async handleSummarizeWebpage(args: { url: string; length?: string; focus?: string; }) { try { console.error(`Summarizing webpage: ${args.url} with length: ${args.length || 'medium'}`); // Extract content from the webpage const content = await this.contentExtractor.extractContent(args.url, 'markdown'); // Determine word count target based on requested length let wordCountTarget: number; switch (args.length?.toLowerCase() || 'medium') { case 'short': wordCountTarget = 250; break; case 'long': wordCountTarget = 1000; break; case 'medium': default: wordCountTarget = 500; break; } // Create a more focused summary than the basic one provided by the content extractor let summary = ''; if (content.summary) { // Use existing summary as a starting point summary = content.summary; } else { // Extract first few paragraphs if no summary is available const paragraphs = content.content.split('\n\n').filter(p => p.length > 50); summary = paragraphs.slice(0, 3).join('\n\n'); } // Format the response let responseText = `# Summary of [${content.title}](${args.url})\n\n`; // Add metadata responseText += `**Source**: ${args.url}\n`; responseText += `**Word Count**: ${content.stats.word_count} words in original content\n`; responseText += `**Summary Length**: ${args.length || 'medium'} (target: ~${wordCountTarget} words)\n`; if (args.focus) { responseText += `**Focus**: ${args.focus}\n`; } responseText += `\n## Summary\n\n${summary}\n\n`; // Add key points section (extracted from headings or prominent paragraphs) if (content.structure?.headings && content.structure.headings.length > 0) { responseText += `## Key Points\n\n`; content.structure.headings.slice(0, 5).forEach(heading => { responseText += `- ${heading}\n`; }); responseText += '\n'; } // Add a note about using extract_webpage_content for more details responseText += `\n---\n*For complete content, use the \`extract_webpage_content\` tool with the same URL.*`; return { content: [ { type: 'text', text: responseText, }, ], }; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; return { content: [ { type: 'text', text: `Error summarizing webpage: ${errorMessage}`, }, ], isError: true, }; } } /** * Handle contextual navigation request */ private async handleContextualNavigation(args: { url: string; keywords?: string[]; depth?: number; maxLinks?: number; sessionId?: string; stayOnDomain?: boolean; }): Promise<any> { try { console.error(`Starting contextual navigation from ${args.url} with depth ${args.depth}`); // Validate depth (1-3) const depth = Math.max(1, Math.min(3, args.depth || 1)); // Validate max links (1-5) const maxLinks = Math.max(1, Math.min(5, args.maxLinks || 3)); // Use existing session or create a new one let sessionId = args.sessionId; if (!sessionId || !this.activeSessions.has(sessionId)) { sessionId = uuidv4(); this.activeSessions.set(sessionId, { id: sessionId, topic: args.keywords?.join(', '), lastActivity: new Date() }); console.error(`Created new browsing session: ${sessionId}`); } else { // Update last activity const session = this.activeSessions.get(sessionId)!; session.lastActivity = new Date(); console.error(`Using existing browsing session: ${sessionId}`); } // Call the navigation service const result = await this.navigationService.followLinks(sessionId, { url: args.url, keywords: args.keywords, maxLinksToFollow: maxLinks, depth, stayOnDomain: args.stayOnDomain }); // Format the response let responseText = `# Contextual Navigation Results\n\n`; responseText += `**Starting URL**: [${args.url}](${args.url})\n`; responseText += `**Session ID**: \`${sessionId}\` (can be used to continue this browsing session)\n`; responseText += `**Depth**: ${depth}\n`; responseText += `**Keywords**: ${args.keywords?.join(', ') || 'None'}\n\n`; responseText += `## Pages Visited\n\n`; result.pagesVisited.forEach((page, index) => { responseText += `### ${index + 1}. [${page.title}](${page.url})\n\n`; responseText += `**Relevance**: ${Math.round(page.relevance * 100)}%\n\n`; responseText += `${page.summary}\n\n`; }); if (result.relatedTopics.length > 0) { responseText += `## Related Topics\n\n`; result.relatedTopics.forEach(topic => { responseText += `- ${topic}\n`; }); responseText += `\n`; } responseText += `## Navigation Path\n\n`; result.navigationPath.forEach((url, index) => { const page = result.pagesVisited.find(p => p.url === url); responseText += `${index + 1}. ${page ? `[${page.title}](${url})` : url}\n`; }); return { content: [ { type: 'text', text: responseText, }, ], }; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; return { content: [ { type: 'text', text: `Error during contextual navigation: ${errorMessage}`, }, ], isError: true, }; } } /** * Handle structured content extraction request */ private async handleStructuredContentExtraction(args: { url: string; format?: OutputFormat; preserveTables?: boolean; extractImages?: boolean; analyzeLinks?: boolean; }): Promise<any> { try { console.error(`Extracting structured content from ${args.url}`); // Extract enhanced content const content = await this.enhancedContentExtractor.extractEnhancedContent( args.url, args.format || 'markdown' ); // Format the response let responseText = `# Structured Content: ${content.title}\n\n`; responseText += `**Source**: [${args.url}](${args.url})\n`; responseText += `**Extracted on**: ${new Date().toISOString()}\n\n`; // Add summary if available if (content.summary) { responseText += `## Summary\n\n${content.summary}\n\n`; } // Add main content responseText += `## Main Content\n\n${content.content}\n\n`; // Add tables if requested if (args.preserveTables && content.structuredData.tables.length > 0) { responseText += `## Tables\n\n`; content.structuredData.tables.forEach((table, index) => { responseText += `### Table ${index + 1}: ${table.caption || 'Untitled'}\n\n`; responseText += table.markdownRepresentation + '\n\n'; }); } // Add images if requested if (args.extractImages && content.images.length > 0) { responseText += `## Images\n\n`; content.images.forEach((image, index) => { responseText += `### Image ${index + 1}\n\n`; responseText += `**URL**: ${image.url}\n`; responseText += `**Alt text**: ${image.alt || image.generatedAlt || 'No description available'}\n`; if (image.position.nearestHeading) { responseText += `**Section**: ${image.position.nearestHeading}\n`; } responseText += `**Context**: ${image.context}\n\n`; }); } // Add links if requested if (args.analyzeLinks && content.links.length > 0) { responseText += `## Links\n\n`; // Group links by relevance const topLinks = content.links.slice(0, 5); responseText += `### Top Links\n\n`; topLinks.forEach((link, index) => { responseText += `${index + 1}. [${link.text || link.url}](${link.url})\n`; responseText += ` Context: ${link.context.substring(0, 100)}...\n\n`; }); responseText += `**Total Links**: ${content.links.length}\n\n`; } // Add source credibility if available if (content.sourceCredibility) { responseText += `## Source Credibility\n\n`; responseText += `**Credibility Score**: ${Math.round(content.sourceCredibility.score * 100)}%\n\n`; if (content.sourceCredibility.factors.length > 0) { responseText += `**Factors**:\n\n`; content.sourceCredibility.factors.forEach(factor => { responseText += `- ${factor}\n`; }); } } return { content: [ { type: 'text', text: responseText, }, ], }; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; return { content: [ { type: 'text', text: `Error extracting structured content: ${errorMessage}`, }, ], isError: true, }; } } /** * Handle enhanced synthesis request */ private async handleEnhancedSynthesis(args: { urls: string[]; focus?: string; structure?: string; detectContradictions?: boolean; assessCredibility?: boolean; compareBy?: string[]; }): Promise<any> { try { console.error(`Performing enhanced synthesis of ${args.urls.length} sources`); if (args.urls.length > 10) { return { content: [{ type: 'text', text: 'Maximum 10 URLs allowed per synthesis to maintain quality. Please reduce the number of URLs.' }], isError: true }; } // Extract enhanced content from all URLs const contentPromises = args.urls.map(url => this.enhancedContentExtractor.extractEnhancedContent(url, 'markdown') .catch(error => ({ url, title: url, content: '', format: 'markdown' as OutputFormat, description: '', meta_tags: {}, stats: { word_count: 0, approximate_chars: 0 }, content_preview: { first_500_chars: '' }, links: [], images: [], structuredData: { tables: [], lists: [], hierarchies: [], keyValuePairs: [] }, error: error instanceof Error ? error.message : 'Unknown error' })) ); const sources = await Promise.all(contentPromises); // Filter out failed extractions const validSources = sources.filter(source => !('error' in source)); if (validSources.length === 0) { return { content: [{ type: 'text', text: 'Could not extract valid content from any of the provided URLs. Please check the URLs and try again.' }], isError: true }; } // Perform enhanced synthesis const result = await this.multiSourceSynthesizer.synthesize({ sources: validSources, focus: args.focus, structure: args.structure || 'thematic', compareBy: args.compareBy, detectContradictions: args.detectContradictions !== false, includeSourceCredibility: args.assessCredibility !== false, visualizeRelationships: true }); return { content: [ { type: 'text', text: result.document, }, ], }; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; return { content: [ { type: 'text', text: `Error performing enhanced synthesis: ${errorMessage}`, }, ], isError: true, }; } } async start() { try { const transport = new StdioServerTransport(); await this.server.connect(transport); console.error('Google Research MCP server running (Research Component)'); // Add a no-op interval to keep the Node.js event loop active // This prevents the process from exiting after initialization setInterval(() => {}, 60000); // Keep the process running process.on('SIGINT', () => { this.server.close().catch(console.error); process.exit(0); }); } catch (error: unknown) { if (error instanceof Error) { console.error('Failed to start MCP server:', error.message); } else { console.error('Failed to start MCP server: Unknown error'); } process.exit(1); } } } // Start the server const server = new GoogleResearchServer(); server.start().catch(console.error);

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mixelpixx/Google-Research-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server