Skip to main content
Glama

Scholarly Research MCP Server

by aringadre76
index.ts30.6 kB
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { PubMedAdapter } from './adapters/pubmed'; import { GoogleScholarAdapter } from './adapters/google-scholar'; import { ArXivAdapter } from './adapters/arxiv'; import { UnifiedSearchAdapter } from './adapters/unified-search'; import { EnhancedUnifiedSearchAdapter } from './adapters/enhanced-unified-search'; import { PreferenceAwareUnifiedSearchAdapter } from './adapters/preference-aware-unified-search'; import { UserPreferencesManager } from './preferences/user-preferences'; import { z } from 'zod'; async function main() { const mcpServer = new McpServer({ name: 'scholarly-research-mcp', version: '2.0.0', }); const pubmedAdapter = new PubMedAdapter(); const googleScholarAdapter = new GoogleScholarAdapter(); const arxivAdapter = new ArXivAdapter(); const unifiedSearchAdapter = new UnifiedSearchAdapter(); const enhancedUnifiedSearchAdapter = new EnhancedUnifiedSearchAdapter(); const preferenceAwareAdapter = new PreferenceAwareUnifiedSearchAdapter(); const preferencesManager = UserPreferencesManager.getInstance(); mcpServer.tool( 'research_search', 'Comprehensive research paper search across multiple sources with advanced filtering and unified results', { query: z.string().describe('Search query for papers'), maxResults: z.number().optional().describe('Maximum number of results (default: 20)'), startDate: z.string().optional().describe('Start date for publication range (YYYY/MM/DD format)'), endDate: z.string().optional().describe('End date for publication range (YYYY/MM/DD format)'), journal: z.string().optional().describe('Filter by specific journal'), author: z.string().optional().describe('Filter by specific author'), sources: z.array(z.enum(['pubmed', 'google-scholar', 'arxiv', 'jstor'])).optional().describe('Sources to search (default: all)'), includeAbstracts: z.boolean().optional().describe('Include paper abstracts in results (default: true)'), sortBy: z.enum(['relevance', 'date', 'citations']).optional().describe('Sort order (default: relevance)') }, async ({ query, maxResults = 20, startDate, endDate, journal, author, sources = ['pubmed', 'google-scholar', 'arxiv'], includeAbstracts = true, sortBy = 'relevance' }) => { try { let allPapers = []; if (sources.includes('pubmed')) { const pubmedPapers = await pubmedAdapter.searchPapers({ query, maxResults: Math.ceil(maxResults / sources.length), startDate, endDate, journal, author }); allPapers.push(...pubmedPapers); } if (sources.includes('google-scholar')) { try { const scholarPapers = await googleScholarAdapter.searchPapers({ query, maxResults: Math.ceil(maxResults / sources.length), startDate, endDate, journal, author }); allPapers.push(...scholarPapers); } catch (error) { console.warn('Google Scholar search failed:', error); } } if (sources.includes('arxiv')) { try { const arxivPapers = await arxivAdapter.searchPapers({ query, maxResults: Math.ceil(maxResults / sources.length), startDate, endDate, journal, author }); allPapers.push(...arxivPapers); } catch (error) { console.warn('ArXiv search failed:', error); } } if (allPapers.length === 0) { return { content: [ { type: 'text', text: 'No papers found matching your search criteria.' } ] }; } const resultsText = allPapers.map((paper, index) => { const authors = paper.authors.length > 0 ? paper.authors.join(', ') : 'Unknown authors'; const pmcInfo = paper.pmcid ? `\n - PMC ID: ${paper.pmcid} (Full text likely available)` : ''; const doiInfo = paper.doi ? `\n - DOI: ${paper.doi}` : ''; const abstract = includeAbstracts && paper.abstract ? `\n - Abstract: ${paper.abstract.substring(0, 200)}...` : ''; return `${index + 1}. **${paper.title}** - Authors: ${authors} - Journal: ${paper.journal} - Publication Date: ${paper.publicationDate} - PMID: ${paper.pmid}${pmcInfo}${doiInfo}${abstract} `; }).join('\n'); return { content: [ { type: 'text', text: `Found ${allPapers.length} papers from ${sources.join(', ')}:\n\n${resultsText}` } ] }; } catch (error) { return { content: [ { type: 'text', text: `Error searching papers: ${error instanceof Error ? error.message : 'Unknown error'}` } ] }; } } ); mcpServer.tool( 'paper_analysis', 'Get comprehensive paper information, full text, and analysis including quotes, statistics, and findings', { identifier: z.string().describe('Paper identifier (PMID, PMCID, DOI, or ArXiv ID)'), analysisType: z.enum(['basic', 'full-text', 'quotes', 'statistics', 'findings', 'complete']).optional().describe('Type of analysis to perform (default: complete)'), maxQuotes: z.number().optional().describe('Maximum number of quotes to extract (default: 15)'), maxSectionLength: z.number().optional().describe('Maximum length of each section (default: 1000 characters)') }, async ({ identifier, analysisType = 'complete', maxQuotes = 15, maxSectionLength = 1000 }) => { try { let paper; if (identifier.startsWith('PMC')) { paper = await pubmedAdapter.getPaperByPMCID(identifier); } else if (identifier.startsWith('PMC') || /^\d+$/.test(identifier)) { paper = await pubmedAdapter.getPaperById(identifier); } else if (identifier.startsWith('10.') || identifier.startsWith('doi:')) { paper = await pubmedAdapter.getPaperByDOI(identifier.replace('doi:', '')); } else if (identifier.startsWith('arxiv:') || identifier.includes('/')) { paper = await arxivAdapter.getPaperById(identifier); } else { return { content: [ { type: 'text', text: `Invalid identifier format. Please use PMID, PMCID, DOI, or ArXiv ID.` } ] }; } if (!paper) { return { content: [ { type: 'text', text: `No paper found with identifier: ${identifier}` } ] }; } let analysisText = `**${paper.title}** - Authors: ${paper.authors.join(', ')} - Journal: ${paper.journal} - Publication Date: ${paper.publicationDate} - PMID: ${paper.pmid} - PMC ID: ${paper.pmcid || 'None'} - DOI: ${paper.doi || 'None'} - Abstract: ${paper.abstract || 'Not available'}`; if (analysisType === 'full-text' || analysisType === 'complete') { try { const fullText = await pubmedAdapter.getFullText(paper.pmid, 50000); if (fullText) { analysisText += `\n\n**Full Text (Excerpt)** ${fullText.substring(0, 2000)}...`; } } catch (error) { analysisText += `\n\n**Full Text**: Not available (${error instanceof Error ? error.message : 'Unknown error'})`; } } if (analysisType === 'quotes' || analysisType === 'complete') { try { const quotes = await pubmedAdapter.getEvidenceQuotes(paper.pmid, 'all', maxQuotes); if (quotes && quotes.length > 0) { analysisText += `\n\n**Key Quotes and Evidence** ${quotes.map((quote, i) => `${i + 1}. ${quote}`).join('\n')}`; } } catch (error) { analysisText += `\n\n**Quotes**: Not available (${error instanceof Error ? error.message : 'Unknown error'})`; } } if (analysisType === 'statistics' || analysisType === 'complete') { try { const stats = await pubmedAdapter.getEvidenceQuotes(paper.pmid, 'statistics', 10); if (stats && stats.length > 0) { analysisText += `\n\n**Key Statistics** ${stats.map((stat, i) => `${i + 1}. ${stat}`).join('\n')}`; } } catch (error) { analysisText += `\n\n**Statistics**: Not available (${error instanceof Error ? error.message : 'Unknown error'})`; } } if (analysisType === 'findings' || analysisType === 'complete') { try { const findings = await pubmedAdapter.getEvidenceQuotes(paper.pmid, 'findings', 10); if (findings && findings.length > 0) { analysisText += `\n\n**Key Findings** ${findings.map((finding, i) => `${i + 1}. ${finding}`).join('\n')}`; } } catch (error) { analysisText += `\n\n**Findings**: Not available (${error instanceof Error ? error.message : 'Unknown error'})`; } } return { content: [ { type: 'text', text: analysisText } ] }; } catch (error) { return { content: [ { type: 'text', text: `Error analyzing paper: ${error instanceof Error ? error.message : 'Unknown error'}` } ] }; } } ); mcpServer.tool( 'citation_manager', 'Generate citations in multiple formats and get citation information including counts and related papers', { identifier: z.string().describe('Paper identifier (PMID, PMCID, DOI, or ArXiv ID)'), action: z.enum(['generate', 'count', 'related', 'all']).describe('Action to perform'), format: z.enum(['apa', 'mla', 'bibtex', 'endnote', 'ris']).optional().describe('Citation format (required for generate action)'), maxRelated: z.number().optional().describe('Maximum number of related papers (default: 10)') }, async ({ identifier, action, format, maxRelated = 10 }) => { try { let paper; if (identifier.startsWith('PMC')) { paper = await pubmedAdapter.getPaperByPMCID(identifier); } else if (identifier.startsWith('PMC') || /^\d+$/.test(identifier)) { paper = await pubmedAdapter.getPaperById(identifier); } else if (identifier.startsWith('10.') || identifier.startsWith('doi:')) { paper = await pubmedAdapter.getPaperByDOI(identifier.replace('doi:', '')); } else if (identifier.startsWith('arxiv:') || identifier.includes('/')) { paper = await arxivAdapter.getPaperById(identifier); } else { return { content: [ { type: 'text', text: `Invalid identifier format. Please use PMID, PMCID, DOI, or ArXiv ID.` } ] }; } if (!paper) { return { content: [ { type: 'text', text: `No paper found with identifier: ${identifier}` } ] }; } let resultText = `**${paper.title}** - Authors: ${paper.authors.join(', ')} - Journal: ${paper.journal} - Publication Date: ${paper.publicationDate} - PMID: ${paper.pmid} - PMC ID: ${paper.pmcid || 'None'} - DOI: ${paper.doi || 'None'}`; if (action === 'generate' || action === 'all') { if (!format) { return { content: [ { type: 'text', text: 'Format parameter is required for generate action. Please specify apa, mla, bibtex, endnote, or ris.' } ] }; } try { const citation = await pubmedAdapter.getCitation(paper.pmid, format); resultText += `\n\n**Citation (${format.toUpperCase()})** ${citation}`; } catch (error) { resultText += `\n\n**Citation (${format.toUpperCase()})**: Not available (${error instanceof Error ? error.message : 'Unknown error'})`; } } if (action === 'count' || action === 'all') { try { const citationCount = await pubmedAdapter.getCitationCount(paper.pmid); resultText += `\n\n**Citation Count**: ${citationCount}`; } catch (error) { resultText += `\n\n**Citation Count**: Not available (${error instanceof Error ? error.message : 'Unknown error'})`; } } if (action === 'related' || action === 'all') { try { const relatedPapers = await pubmedAdapter.getRelatedPapers(paper.pmid, 'pubmed', maxRelated); if (relatedPapers && relatedPapers.length > 0) { resultText += `\n\n**Related Papers** ${relatedPapers.map((related, i) => `${i + 1}. ${related.title} (PMID: ${related.pmid})`).join('\n')}`; } else { resultText += `\n\n**Related Papers**: None found`; } } catch (error) { resultText += `\n\n**Related Papers**: Not available (${error instanceof Error ? error.message : 'Unknown error'})`; } } return { content: [ { type: 'text', text: resultText } ] }; } catch (error) { return { content: [ { type: 'text', text: `Error managing citations: ${error instanceof Error ? error.message : 'Unknown error'}` } ] }; } } ); mcpServer.tool( 'research_preferences', 'Manage research preferences including source priorities, search settings, display options, and caching', { action: z.enum(['get', 'set', 'reset', 'export', 'import']).describe('Action to perform'), category: z.enum(['source', 'search', 'display', 'cache', 'all']).optional().describe('Preference category to manage'), preferences: z.any().optional().describe('Preferences to set (JSON object for set/import actions)'), sourceName: z.string().optional().describe('Source name for source-specific preferences'), enabled: z.boolean().optional().describe('Whether to enable a source'), priority: z.number().optional().describe('Priority order (1 is highest)'), maxResults: z.number().optional().describe('Maximum results to fetch'), defaultMaxResults: z.number().optional().describe('Default maximum number of results'), defaultSortBy: z.enum(['relevance', 'date', 'citations']).optional().describe('Default sort order'), preferFirecrawl: z.boolean().optional().describe('Prefer Firecrawl over Puppeteer for Google Scholar'), enableDeduplication: z.boolean().optional().describe('Enable deduplication of results across sources'), showAbstracts: z.boolean().optional().describe('Whether to show abstracts in results'), showCitations: z.boolean().optional().describe('Whether to show citation counts'), showUrls: z.boolean().optional().describe('Whether to show URLs'), maxAbstractLength: z.number().optional().describe('Maximum length of abstracts to display'), cacheEnabled: z.boolean().optional().describe('Enable result caching'), cacheExpiry: z.number().optional().describe('Cache expiry time in hours') }, async (params) => { try { const { action, category, preferences, ...otherParams } = params; switch (action) { case 'get': if (category === 'source' || category === 'all') { const sourcePrefs = await preferencesManager.getSourcePreferences(); return { content: [ { type: 'text', text: `**Source Preferences** ${Object.entries(sourcePrefs).map(([source, prefs]) => `- ${source}: ${prefs.enabled ? 'Enabled' : 'Disabled'} (Priority: ${prefs.priority}, Max Results: ${prefs.maxResults})`).join('\n')}` } ] }; } if (category === 'search' || category === 'all') { const searchPrefs = await preferencesManager.getSearchPreferences(); return { content: [ { type: 'text', text: `**Search Preferences** - Default Max Results: ${searchPrefs.defaultMaxResults} - Default Sort By: ${searchPrefs.defaultSortBy} - Prefer Firecrawl: ${searchPrefs.preferFirecrawl} - Enable Deduplication: ${searchPrefs.enableDeduplication}` } ] }; } if (category === 'display' || category === 'all') { const displayPrefs = await preferencesManager.getDisplayPreferences(); return { content: [ { type: 'text', text: `**Display Preferences** - Show Abstracts: ${displayPrefs.showAbstracts} - Show Citations: ${displayPrefs.showCitations} - Show URLs: ${displayPrefs.showUrls} - Max Abstract Length: ${displayPrefs.maxAbstractLength}` } ] }; } if (category === 'cache' || category === 'all') { const cachePrefs = await preferencesManager.getCachePreferences(); return { content: [ { type: 'text', text: `**Cache Preferences** - Cache Enabled: ${cachePrefs.cacheEnabled} - Cache Expiry: ${cachePrefs.cacheExpiry} hours` } ] }; } return { content: [ { type: 'text', text: 'Please specify a category (source, search, display, cache, or all) to get preferences.' } ] }; case 'set': if (category === 'source' && otherParams.sourceName) { await preferencesManager.setSourcePreference( otherParams.sourceName, otherParams.enabled !== undefined ? otherParams.enabled : true, otherParams.priority || 1, otherParams.maxResults || 20 ); return { content: [ { type: 'text', text: `Source preference for ${otherParams.sourceName} updated successfully.` } ] }; } if (category === 'search') { await preferencesManager.setSearchPreferences({ defaultMaxResults: otherParams.defaultMaxResults, defaultSortBy: otherParams.defaultSortBy, preferFirecrawl: otherParams.preferFirecrawl, enableDeduplication: otherParams.enableDeduplication }); return { content: [ { type: 'text', text: 'Search preferences updated successfully.' } ] }; } if (category === 'display') { await preferencesManager.setDisplayPreferences({ showAbstracts: otherParams.showAbstracts, showCitations: otherParams.showCitations, showUrls: otherParams.showUrls, maxAbstractLength: otherParams.maxAbstractLength }); return { content: [ { type: 'text', text: 'Display preferences updated successfully.' } ] }; } if (category === 'cache') { await preferencesManager.setCachePreferences({ cacheEnabled: otherParams.cacheEnabled, cacheExpiry: otherParams.cacheExpiry }); return { content: [ { type: 'text', text: 'Cache preferences updated successfully.' } ] }; } return { content: [ { type: 'text', text: 'Please specify a category and provide the appropriate parameters to set preferences.' } ] }; case 'reset': await preferencesManager.resetPreferences(); return { content: [ { type: 'text', text: 'All preferences have been reset to default values.' } ] }; case 'export': const exportedPrefs = await preferencesManager.exportPreferences(); return { content: [ { type: 'text', text: `**Exported Preferences** \`\`\`json ${JSON.stringify(exportedPrefs, null, 2)} \`\`\`` } ] }; case 'import': if (!preferences) { return { content: [ { type: 'text', text: 'Please provide preferences to import.' } ] }; } await preferencesManager.importPreferences(preferences); return { content: [ { type: 'text', text: 'Preferences imported successfully.' } ] }; default: return { content: [ { type: 'text', text: 'Invalid action. Please use: get, set, reset, export, or import.' } ] }; } } catch (error) { return { content: [ { type: 'text', text: `Error managing preferences: ${error instanceof Error ? error.message : 'Unknown error'}` } ] }; } } ); mcpServer.tool( 'web_research', 'Perform web-based research using Firecrawl for reliable content extraction and analysis', { action: z.enum(['scrape', 'search', 'extract', 'map', 'crawl']).describe('Action to perform'), url: z.string().optional().describe('URL to scrape or starting URL for mapping/crawling'), query: z.string().optional().describe('Search query for web search'), urls: z.array(z.string()).optional().describe('Array of URLs for batch operations'), prompt: z.string().optional().describe('Prompt for content extraction'), schema: z.any().optional().describe('JSON schema for structured extraction'), maxResults: z.number().optional().describe('Maximum number of results (default: 5)'), formats: z.array(z.string()).optional().describe('Content formats to extract (default: markdown)'), onlyMainContent: z.boolean().optional().describe('Extract only main content (default: true)'), waitFor: z.number().optional().describe('Time to wait for dynamic content in milliseconds'), actions: z.array(z.any()).optional().describe('Actions to perform before scraping'), mobile: z.boolean().optional().describe('Use mobile viewport (default: false)'), maxAge: z.number().optional().describe('Maximum age for cached content in milliseconds (default: 172800000)'), maxDiscoveryDepth: z.number().optional().describe('Maximum discovery depth for crawling (default: 5)'), limit: z.number().optional().describe('Maximum number of pages to crawl (default: 10000)'), allowExternalLinks: z.boolean().optional().describe('Allow crawling external links (default: false)'), deduplicateSimilarURLs: z.boolean().optional().describe('Remove similar URLs during crawl (default: true)') }, async (params) => { try { const { action, ...otherParams } = params; switch (action) { case 'scrape': if (!otherParams.url) { return { content: [ { type: 'text', text: 'URL is required for scraping action.' } ] }; } return { content: [ { type: 'text', text: `**Web Scraping Result** URL: ${otherParams.url} Formats: ${(otherParams.formats || ['markdown']).join(', ')} Main Content Only: ${otherParams.onlyMainContent !== false} Mobile Viewport: ${otherParams.mobile || false} Max Age: ${otherParams.maxAge || 172800000}ms Note: This is a placeholder response. In a real implementation, this would call the Firecrawl scraping API to extract content from the specified URL.` } ] }; case 'search': if (!otherParams.query) { return { content: [ { type: 'text', text: 'Query is required for search action.' } ] }; } return { content: [ { type: 'text', text: `**Web Search Result** Query: ${otherParams.query} Max Results: ${otherParams.maxResults || 5} Formats: ${(otherParams.formats || ['markdown']).join(', ')} Main Content Only: ${otherParams.onlyMainContent !== false} Note: This is a placeholder response. In a real implementation, this would call the Firecrawl search API to find relevant web content.` } ] }; case 'extract': if (!otherParams.urls || otherParams.urls.length === 0) { return { content: [ { type: 'text', text: 'URLs array is required for extract action.' } ] }; } if (!otherParams.prompt) { return { content: [ { type: 'text', text: 'Prompt is required for extract action.' } ] }; } return { content: [ { type: 'text', text: `**Content Extraction Result** URLs: ${otherParams.urls.join(', ')} Prompt: ${otherParams.prompt} Schema: ${otherParams.schema ? 'Provided' : 'None'} Allow External Links: ${otherParams.allowExternalLinks || false} Enable Web Search: ${otherParams.enableWebSearch || false} Note: This is a placeholder response. In a real implementation, this would call the Firecrawl extraction API to extract structured information from the specified URLs.` } ] }; case 'map': if (!otherParams.url) { return { content: [ { type: 'text', text: 'URL is required for map action.' } ] }; } return { content: [ { type: 'text', text: `**Website Mapping Result** Starting URL: ${otherParams.url} Search Term: ${otherParams.searchTerm || 'None'} Sitemap Handling: ${otherParams.sitemap || 'include'} Include Subdomains: ${otherParams.includeSubdomains || false} Limit: ${otherParams.limit || 'None'} Ignore Query Parameters: ${otherParams.ignoreQueryParameters !== false} Note: This is a placeholder response. In a real implementation, this would call the Firecrawl mapping API to discover URLs on the specified website.` } ] }; case 'crawl': if (!otherParams.url) { return { content: [ { type: 'text', text: 'URL is required for crawl action.' } ] }; } return { content: [ { type: 'text', text: `**Website Crawling Result** Starting URL: ${otherParams.url} Max Discovery Depth: ${otherParams.maxDiscoveryDepth || 5} Limit: ${otherParams.limit || 10000} Allow External Links: ${otherParams.allowExternalLinks || false} Deduplicate Similar URLs: ${otherParams.deduplicateSimilarURLs !== false} Delay: ${otherParams.delay || 'None'} seconds Note: This is a placeholder response. In a real implementation, this would start a Firecrawl crawling job to extract content from multiple pages on the specified website.` } ] }; default: return { content: [ { type: 'text', text: 'Invalid action. Please use: scrape, search, extract, map, or crawl.' } ] }; } } catch (error) { return { content: [ { type: 'text', text: `Error performing web research: ${error instanceof Error ? error.message : 'Unknown error'}` } ] }; } } ); const transport = new StdioServerTransport(); mcpServer.connect(transport); console.error('Scholarly Research MCP Server (Consolidated) started'); } main().catch(console.error);

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/aringadre76/mcp-for-research'

If you have feedback or need assistance with the MCP directory API, please join our Discord server