arXiv MCP Server

Overview Schema Related Servers Score Discussions

arxiv-mcp-server
src

index.ts

index.ts•18.8 KiB

#!/usr/bin/env node import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, } from '@modelcontextprotocol/sdk/types.js'; import axios from 'axios'; import fs from 'fs-extra'; import * as path from 'path'; // pdf-parse will be imported dynamically where needed // Base URL for arXiv API const ARXIV_API_BASE_URL = 'http://export.arxiv.org/api/query'; // Directory for temporary PDF storage const TEMP_PDF_DIR = path.join(process.cwd(), 'temp', 'pdfs'); // Interface for search parameters interface SearchParams { search_query?: string; id_list?: string; start?: number; max_results?: number; sortBy?: string; sortOrder?: string; } // Interface for paper search arguments interface SearchPapersArgs { query?: string; category?: string; author?: string; title?: string; abstract?: string; start?: number; max_results?: number; sort_by?: string; sort_order?: string; } // Interface for get paper arguments interface GetPaperArgs { paper_id: string; } // Interface for category search arguments interface SearchByCategoryArgs { category: string; start?: number; max_results?: number; sort_by?: string; sort_order?: string; } // Interface for get paper content arguments interface GetPaperContentArgs { paper_id: string; } export class ArxivServer { private server: Server; constructor() { this.server = new Server( { name: 'arxiv-mcp-server', version: '0.1.0', }, { capabilities: { tools: {}, }, } ); this.setupToolHandlers(); // Error handling this.server.onerror = (error) => console.error('[MCP Error]', error); process.on('SIGINT', async () => { await this.server.close(); process.exit(0); }); } /** * Expose private methods for testing purposes * This allows tests to access these methods without type casting */ // Property for testing purposes only public _testMethods = { searchPapers: this.searchPapers.bind(this), getPaper: this.getPaper.bind(this), searchByCategory: this.searchByCategory.bind(this), getPaperContent: this.getPaperContent.bind(this), queryArxiv: this.queryArxiv.bind(this), processArxivResponse: this.processArxivResponse.bind(this), downloadPdf: this.downloadPdf.bind(this), extractTextFromPdf: this.extractTextFromPdf.bind(this), }; private setupToolHandlers() { this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: 'search_papers', description: 'Search for papers on arXiv by various criteria', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'General search query across all fields', }, category: { type: 'string', description: 'arXiv category (e.g., cs.AI, physics.optics)', }, author: { type: 'string', description: 'Author name', }, title: { type: 'string', description: 'Words in the title', }, abstract: { type: 'string', description: 'Words in the abstract', }, start: { type: 'number', description: 'Starting index for pagination (0-based)', }, max_results: { type: 'number', description: 'Maximum number of results to return (max 2000)', }, sort_by: { type: 'string', description: 'Sort by: relevance, lastUpdatedDate, submittedDate', enum: ['relevance', 'lastUpdatedDate', 'submittedDate'], }, sort_order: { type: 'string', description: 'Sort order: ascending or descending', enum: ['ascending', 'descending'], }, }, }, }, { name: 'get_paper', description: 'Get details about a specific paper by its arXiv ID', inputSchema: { type: 'object', properties: { paper_id: { type: 'string', description: 'arXiv paper ID (e.g., 2104.13478 or cs/0001001)', }, }, required: ['paper_id'], }, }, { name: 'search_by_category', description: 'Search for papers in a specific arXiv category', inputSchema: { type: 'object', properties: { category: { type: 'string', description: 'arXiv category (e.g., cs.AI, physics.optics)', }, start: { type: 'number', description: 'Starting index for pagination (0-based)', }, max_results: { type: 'number', description: 'Maximum number of results to return (max 2000)', }, sort_by: { type: 'string', description: 'Sort by: relevance, lastUpdatedDate, submittedDate', enum: ['relevance', 'lastUpdatedDate', 'submittedDate'], }, sort_order: { type: 'string', description: 'Sort order: ascending or descending', enum: ['ascending', 'descending'], }, }, required: ['category'], }, }, { name: 'get_paper_content', description: 'Get the full text content of a paper by downloading and extracting text from its PDF', inputSchema: { type: 'object', properties: { paper_id: { type: 'string', description: 'arXiv paper ID (e.g., 2104.13478 or cs/0001001)', }, }, required: ['paper_id'], }, }, ], })); this.server.setRequestHandler(CallToolRequestSchema, async (request) => { try { switch (request.params.name) { case 'search_papers': return await this.searchPapers(request.params.arguments as unknown as SearchPapersArgs); case 'get_paper': if (!request.params.arguments || typeof request.params.arguments.paper_id !== 'string') { throw new McpError( ErrorCode.InvalidParams, 'Missing or invalid paper_id parameter' ); } return await this.getPaper(request.params.arguments as unknown as GetPaperArgs); case 'search_by_category': if (!request.params.arguments || typeof request.params.arguments.category !== 'string') { throw new McpError( ErrorCode.InvalidParams, 'Missing or invalid category parameter' ); } return await this.searchByCategory(request.params.arguments as unknown as SearchByCategoryArgs); case 'get_paper_content': if (!request.params.arguments || typeof request.params.arguments.paper_id !== 'string') { throw new McpError( ErrorCode.InvalidParams, 'Missing or invalid paper_id parameter' ); } return await this.getPaperContent(request.params.arguments as unknown as GetPaperContentArgs); default: throw new McpError( ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}` ); } } catch (error) { if (axios.isAxiosError(error)) { return { content: [ { type: 'text', text: `arXiv API error: ${error.response?.data || error.message}`, }, ], isError: true, }; } throw error; } }); } public async searchPapers(args: SearchPapersArgs) { const searchParams: SearchParams = {}; // Build search query const searchTerms: string[] = []; if (args.query) { searchTerms.push(`all:${args.query}`); } if (args.category) { searchTerms.push(`cat:${args.category}`); } if (args.author) { searchTerms.push(`au:${args.author}`); } if (args.title) { searchTerms.push(`ti:${args.title}`); } if (args.abstract) { searchTerms.push(`abs:${args.abstract}`); } if (searchTerms.length > 0) { searchParams.search_query = searchTerms.join('+AND+'); } // Add pagination if (args.start !== undefined) { searchParams.start = args.start; } if (args.max_results !== undefined) { searchParams.max_results = Math.min(args.max_results, 2000); // API limit } else { searchParams.max_results = 10; // Default } // Add sorting if (args.sort_by) { searchParams.sortBy = args.sort_by; } if (args.sort_order) { searchParams.sortOrder = args.sort_order; } const response = await this.queryArxiv(searchParams); return { content: [ { type: 'text', text: JSON.stringify(response, null, 2), }, ], }; } private async getPaper(args: GetPaperArgs) { const searchParams: SearchParams = { id_list: args.paper_id, }; const response = await this.queryArxiv(searchParams); return { content: [ { type: 'text', text: JSON.stringify(response, null, 2), }, ], }; } private async searchByCategory(args: SearchByCategoryArgs) { const searchParams: SearchParams = { search_query: `cat:${args.category}`, }; // Add pagination if (args.start !== undefined) { searchParams.start = args.start; } if (args.max_results !== undefined) { searchParams.max_results = Math.min(args.max_results, 2000); // API limit } else { searchParams.max_results = 10; // Default } // Add sorting if (args.sort_by) { searchParams.sortBy = args.sort_by; } if (args.sort_order) { searchParams.sortOrder = args.sort_order; } const response = await this.queryArxiv(searchParams); return { content: [ { type: 'text', text: JSON.stringify(response, null, 2), }, ], }; } private async queryArxiv(params: SearchParams) { try { const response = await axios.get(ARXIV_API_BASE_URL, { params }); // Parse the XML response const xmlData = response.data; // Extract and process the data // For simplicity, we're returning the raw XML data // In a production environment, you would parse this XML into a more usable format return this.processArxivResponse(xmlData); } catch (error) { console.error('Error querying arXiv API:', error); throw error; } } private processArxivResponse(xmlData: string) { try { // Basic XML parsing to extract paper information const papers: any[] = []; // Extract feed information const titleMatch = xmlData.match(/<title[^>]*>(.*?)<\/title>/); const totalResultsMatch = xmlData.match(/<opensearch:totalResults[^>]*>(\d+)<\/opensearch:totalResults>/); const startIndexMatch = xmlData.match(/<opensearch:startIndex[^>]*>(\d+)<\/opensearch:startIndex>/); const itemsPerPageMatch = xmlData.match(/<opensearch:itemsPerPage[^>]*>(\d+)<\/opensearch:itemsPerPage>/); // Extract entry elements const entryRegex = /<entry>(.*?)<\/entry>/gs; let entryMatch; while ((entryMatch = entryRegex.exec(xmlData)) !== null) { const entry = entryMatch[1]; // Extract paper details const idMatch = entry.match(/<id[^>]*>(.*?)<\/id>/); const titleMatch = entry.match(/<title[^>]*>(.*?)<\/title>/); const summaryMatch = entry.match(/<summary[^>]*>(.*?)<\/summary>/); const publishedMatch = entry.match(/<published[^>]*>(.*?)<\/published>/); const updatedMatch = entry.match(/<updated[^>]*>(.*?)<\/updated>/); // Extract authors const authors: string[] = []; const authorRegex = /<author[^>]*>[\s\S]*?<name[^>]*>(.*?)<\/name>[\s\S]*?<\/author>/g; let authorMatch; while ((authorMatch = authorRegex.exec(entry)) !== null) { authors.push(authorMatch[1].trim()); } // Extract categories const categories: string[] = []; const categoryRegex = /<category[^>]*term="([^"]+)"/g; let categoryMatch; while ((categoryMatch = categoryRegex.exec(entry)) !== null) { categories.push(categoryMatch[1]); } // Extract links const links: any[] = []; const linkRegex = /<link[^>]*href="([^"]+)"[^>]*(?:rel="([^"]+)")?[^>]*(?:type="([^"]+)")?[^>]*\/>/g; let linkMatch; while ((linkMatch = linkRegex.exec(entry)) !== null) { links.push({ href: linkMatch[1], rel: linkMatch[2] || 'alternate', type: linkMatch[3] || 'text/html' }); } const paper = { id: idMatch ? idMatch[1].trim() : '', title: titleMatch ? titleMatch[1].trim().replace(/\s+/g, ' ') : '', summary: summaryMatch ? summaryMatch[1].trim().replace(/\s+/g, ' ') : '', authors: authors, published: publishedMatch ? publishedMatch[1].trim() : '', updated: updatedMatch ? updatedMatch[1].trim() : '', categories: categories, links: links, // Extract arXiv ID from the main ID arxiv_id: idMatch ? idMatch[1].replace('http://arxiv.org/abs/', '') : '' }; papers.push(paper); } return { feed_title: titleMatch ? titleMatch[1].trim() : '', total_results: totalResultsMatch ? parseInt(totalResultsMatch[1]) : 0, start_index: startIndexMatch ? parseInt(startIndexMatch[1]) : 0, items_per_page: itemsPerPageMatch ? parseInt(itemsPerPageMatch[1]) : 0, papers: papers }; } catch (error) { console.error('Error parsing arXiv XML response:', error); return { error: 'Failed to parse arXiv response', raw_response: xmlData.substring(0, 1000) + '...' // Truncated for safety }; } } /** * Downloads a PDF file from a URL and saves it to the temporary directory * @param url URL of the PDF to download * @param paperId arXiv paper ID (used for filename) * @returns Path to the downloaded PDF file */ private async downloadPdf(url: string, paperId: string): Promise<string> { try { // Ensure temp directory exists await fs.ensureDir(TEMP_PDF_DIR); // Create a unique filename based on the paper ID const sanitizedPaperId = paperId.replace(/\//g, '_'); const pdfPath = path.join(TEMP_PDF_DIR, `${sanitizedPaperId}.pdf`); // Check if we already have this PDF cached if (await fs.pathExists(pdfPath)) { console.error(`Using cached PDF for ${paperId}`); return pdfPath; } console.error(`Downloading PDF for ${paperId} from ${url}`); // Download the PDF with proper headers // Note: Using responseType 'arraybuffer' to handle binary data const response = await axios.get(url, { responseType: 'arraybuffer', headers: { 'User-Agent': 'arXiv-MCP-Server/0.1.0 (https://github.com/your-username/arxiv-mcp-server)', }, // Add a timeout to prevent hanging on large files timeout: 30000, }); // Save the PDF to disk await fs.outputFile(pdfPath, response.data); return pdfPath; } catch (error) { console.error('Error downloading PDF:', error); throw new Error(`Failed to download PDF: ${error instanceof Error ? error.message : String(error)}`); } } /** * Extracts text content from a PDF file * @param pdfPath Path to the PDF file * @returns Extracted text content */ private async extractTextFromPdf(pdfPath: string): Promise<string> { try { // Read the PDF file const dataBuffer = await fs.readFile(pdfPath); // Dynamically import pdf-parse const pdfParse = (await import('pdf-parse')).default; // Parse the PDF const data = await pdfParse(dataBuffer); // Return the text content return data.text; } catch (error) { console.error('Error extracting text from PDF:', error); throw new Error(`Failed to extract text from PDF: ${error instanceof Error ? error.message : String(error)}`); } } /** * Gets the full text content of a paper by downloading and extracting text from its PDF * @param args Object containing paper_id * @returns Object containing the extracted text content */ private async getPaperContent(args: GetPaperContentArgs) { try { // Construct the PDF URL directly // arXiv PDF URLs follow the pattern: https://arxiv.org/pdf/{paper_id}.pdf const pdfUrl = `https://arxiv.org/pdf/${args.paper_id}.pdf`; // Download the PDF const pdfPath = await this.downloadPdf(pdfUrl, args.paper_id); // Extract text from the PDF const textContent = await this.extractTextFromPdf(pdfPath); // Clean up the text (remove excessive whitespace, normalize line breaks) const cleanedText = textContent .replace(/\s+/g, ' ') .replace(/(\r\n|\n|\r)/gm, '\n') .trim(); // Return the extracted text return { content: [ { type: 'text', text: cleanedText, }, ], }; } catch (error) { console.error('Error in getPaperContent:', error); if (axios.isAxiosError(error)) { return { content: [ { type: 'text', text: `Error retrieving paper content: ${error.response?.data || error.message}`, }, ], isError: true, }; } return { content: [ { type: 'text', text: `Error processing paper content: ${error instanceof Error ? error.message : String(error)}`, }, ], isError: true, }; } } async run() { const transport = new StdioServerTransport(); await this.server.connect(transport); console.error('arXiv MCP server running on stdio'); } } const server = new ArxivServer(); server.run().catch(console.error);

Loading blob content...

Implementation Reference

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Mnehmos/arxiv-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

index.ts•18.8 KiB