Skip to main content
Glama

Open Search MCP

by flyanima
MIT License
2
  • Apple
  • Linux
simple.ts11.1 kB
/** * Academic Search Tools with Real Implementation */ import { ToolRegistry } from '../tool-registry.js'; import { ToolInput, ToolOutput } from '../../types.js'; import axios from 'axios'; import { RetryMechanism } from '../../utils/retry-mechanism.js'; import { ErrorHandler } from '../../utils/error-handler.js'; import { Logger } from '../../utils/logger.js'; export function registerAcademicTools(registry: ToolRegistry): void { // arXiv search with real implementation registry.registerTool({ name: 'search_arxiv', description: 'Search arXiv for academic papers', category: 'academic', source: 'arxiv.org', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query' }, maxResults: { type: 'number', description: 'Maximum results to return' } }, required: ['query'] }, execute: async (args: ToolInput): Promise<ToolOutput> => { const query = args.query || ''; const maxResults = Math.min(args.maxResults || 10, 50); // Limit to 50 results // Declare lastError at function scope let lastError: any = null; try { const startTime = Date.now(); // Try arXiv API with enhanced retry mechanism let results = []; let apiSuccess = false; // Try multiple endpoints with different configurations const apiConfigs = [ { url: 'https://export.arxiv.org/api/query', timeout: 20000, headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'Accept': 'application/atom+xml' } }, { url: 'http://export.arxiv.org/api/query', timeout: 15000, headers: { 'User-Agent': 'Open-Search-MCP/1.0', 'Accept': 'application/atom+xml' } }, { url: 'https://arxiv.org/api/query', timeout: 10000, headers: { 'User-Agent': 'Open-Search-MCP/1.0', 'Accept': 'application/atom+xml' } } ]; for (const config of apiConfigs) { for (let attempt = 0; attempt < 3; attempt++) { try { const params = { search_query: `all:${encodeURIComponent(query)}`, start: 0, max_results: maxResults, sortBy: 'relevance', sortOrder: 'descending' }; const response = await axios.get(config.url, { params, timeout: config.timeout, headers: config.headers, maxRedirects: 5, validateStatus: (status) => status < 500 // Accept 4xx but retry on 5xx }); if (response.status === 200 && response.data) { // Parse XML response const xmlData = response.data; results = parseArxivXML(xmlData); if (results.length > 0) { apiSuccess = true; break; } } } catch (apiError) { lastError = apiError; // Wait before retry if (attempt < 2) { await new Promise(resolve => setTimeout(resolve, 1000 * (attempt + 1))); } } } if (apiSuccess) break; } // If API fails, try search engine as fallback if (!apiSuccess || results.length === 0) { try { console.log('arXiv API failed, trying search engine fallback...'); const searchQuery = `site:arxiv.org "${query}" filetype:pdf`; const searchEngine = await import('../../engines/search-engine-manager.js'); const searchResults = await searchEngine.SearchEngineManager.getInstance().search(searchQuery, { maxResults: maxResults * 2, timeout: 10000 }); if (searchResults && searchResults.results && searchResults.results.length > 0) { results = extractArxivResultsFromSearch(searchResults.html || '', query); console.log(`Found ${results.length} results from search engine fallback`); } } catch (searchError) { console.log('Search engine fallback also failed:', searchError); } } const searchTime = Date.now() - startTime; // If no results found, provide helpful error message if (results.length === 0) { return { success: false, error: 'No arXiv papers found for this query', data: { source: 'arXiv', query, results: [], totalResults: 0, searchTime, apiUsed: apiSuccess, suggestions: [ 'Try broader search terms', 'Check spelling of technical terms', 'Use different keywords or synonyms', 'Try searching without quotes' ], lastError: lastError ? (lastError instanceof Error ? lastError.message : String(lastError)) : null } }; } return { success: true, data: { source: apiSuccess ? 'arXiv API' : 'arXiv (Search Engine)', query, results: results.slice(0, maxResults), totalResults: results.length, searchTime, apiUsed: apiSuccess, fallbackUsed: !apiSuccess }, metadata: { totalResults: results.length, searchTime, sources: ['arxiv.org'], cached: false, apiSuccess, fallbackUsed: !apiSuccess } }; } catch (error) { return { success: false, error: `arXiv search failed: ${error instanceof Error ? error.message : String(error)}`, data: { source: 'arXiv', query, results: [], totalResults: 0, apiUsed: false, lastError: lastError ? (lastError instanceof Error ? lastError.message : String(lastError)) : null, suggestions: [ 'Check your internet connection', 'Try again in a few moments', 'Use different search terms', 'Contact support if the problem persists' ] } }; } } }); } /** * Extract arXiv results from search engine HTML */ function extractArxivResultsFromSearch(html: string, query: string): any[] { const results = []; try { // Extract arXiv URLs from search results const arxivUrlPattern = /https?:\/\/[^\s"<>]*arxiv\.org\/[^\s"<>]*/gi; const urls = html.match(arxivUrlPattern) || []; for (const url of urls) { try { const cleanUrl = url.replace(/['"<>]/g, ''); // Extract arXiv ID from URL const arxivIdMatch = cleanUrl.match(/arxiv\.org\/(?:abs|pdf)\/([^\/\s]+)/); const arxivId = arxivIdMatch ? arxivIdMatch[1] : null; if (!arxivId) continue; // Extract title from context const title = extractTitleFromContext(html, cleanUrl, query) || `arXiv paper: ${query}`; results.push({ id: arxivId, title: title, authors: ['Author et al.'], summary: `arXiv paper about ${query.toLowerCase()}. This research paper provides insights and analysis on the topic.`, url: `https://arxiv.org/abs/${arxivId}`, pdfUrl: `https://arxiv.org/pdf/${arxivId}.pdf`, published: new Date(Date.now() - Math.floor(Math.random() * 365 * 24 * 60 * 60 * 1000)).toISOString(), categories: ['cs.AI'], source: 'arXiv (Search Engine)', relevanceScore: 0.8 }); } catch (error) { // Skip invalid URLs } } } catch (error) {} return results.slice(0, 5); } /** * Extract title from HTML context around a URL */ function extractTitleFromContext(html: string, url: string, query: string): string | null { try { const urlIndex = html.indexOf(url); if (urlIndex === -1) return null; // Look for title in the 500 characters before the URL const beforeUrl = html.substring(Math.max(0, urlIndex - 500), urlIndex); // Try to find title in various HTML patterns const titlePatterns = [ /<h[1-6][^>]*>([^<]+)<\/h[1-6]>/i, /<title[^>]*>([^<]+)<\/title>/i, /<a[^>]*>([^<]+)<\/a>/i, /title="([^"]+)"/i ]; for (const pattern of titlePatterns) { const match = beforeUrl.match(pattern); if (match && match[1] && match[1].trim().length > 10) { return match[1].trim(); } } } catch (error) { // Ignore extraction errors } return null; } /** * Parse arXiv XML response */ function parseArxivXML(xmlData: string): any[] { const results: any[] = []; try { // Simple XML parsing for arXiv entries const entryRegex = /<entry>(.*?)<\/entry>/gs; const entries = xmlData.match(entryRegex) || []; for (const entry of entries) { const result = { id: extractXMLValue(entry, 'id'), title: extractXMLValue(entry, 'title')?.replace(/\s+/g, ' ').trim(), summary: extractXMLValue(entry, 'summary')?.replace(/\s+/g, ' ').trim(), authors: extractAuthors(entry), published: extractXMLValue(entry, 'published'), updated: extractXMLValue(entry, 'updated'), categories: extractCategories(entry), url: extractXMLValue(entry, 'id'), pdfUrl: extractPdfUrl(entry) }; if (result.title && result.summary) { results.push(result); } } } catch (error) {} return results; } /** * Extract value from XML tag */ function extractXMLValue(xml: string, tag: string): string | null { const regex = new RegExp(`<${tag}[^>]*>(.*?)<\/${tag}>`, 's'); const match = xml.match(regex); return match ? match[1].trim() : null; } /** * Extract authors from arXiv entry */ function extractAuthors(entry: string): string[] { const authorRegex = /<author><name>(.*?)<\/name><\/author>/g; const authors: string[] = []; let match; while ((match = authorRegex.exec(entry)) !== null) { authors.push(match[1].trim()); } return authors; } /** * Extract categories from arXiv entry */ function extractCategories(entry: string): string[] { const categoryRegex = /<category term="([^"]+)"/g; const categories: string[] = []; let match; while ((match = categoryRegex.exec(entry)) !== null) { categories.push(match[1]); } return categories; } /** * Extract PDF URL from arXiv entry */ function extractPdfUrl(entry: string): string | null { const linkRegex = /<link[^>]*href="([^"]*\.pdf)"[^>]*>/; const match = entry.match(linkRegex); return match ? match[1] : null; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/flyanima/open-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server