mcp-simple-arxiv

""" MCP server for accessing arXiv papers. """ import sys sys.stdout.reconfigure(encoding='utf-8') sys.stdin.reconfigure(encoding='utf-8') import asyncio import json import logging from typing import Any, Sequence from mcp.server import Server import mcp.types as types from mcp.server.stdio import stdio_server from .arxiv_client import ArxivClient from .update_taxonomy import load_taxonomy, update_taxonomy_file logger = logging.getLogger(__name__) def get_first_sentence(text: str, max_len: int = 200) -> str: """Extract first sentence from text, limiting length.""" # Look for common sentence endings for end in ['. ', '! ', '? ']: pos = text.find(end) if pos != -1 and pos < max_len: return text[:pos + 1] # If no sentence ending found, just take first max_len chars if len(text) > max_len: return text[:max_len].rstrip() + '...' return text app = Server("arxiv-server") arxiv_client = ArxivClient() @app.list_tools() async def list_tools() -> list[types.Tool]: """List available tools for interacting with arXiv.""" return [ types.Tool( name="search_papers", description="""Search for papers on arXiv by title and abstract content. You can use advanced search syntax: - Search in title: ti:"search terms" - Search in abstract: abs:"search terms" - Search by author: au:"author name" - Combine terms with: AND, OR, ANDNOT - Filter by category: cat:cs.AI (use list_categories tool to see available categories) Examples: - "machine learning" (searches all fields) - ti:"neural networks" AND cat:cs.AI (title with category) - au:bengio AND ti:"deep learning" (author and title)""", inputSchema={ "type": "object", "properties": { "query": { "type": "string", "description": "Search query to match against paper titles and abstracts" }, "max_results": { "type": "number", "description": "Maximum number of results to return (default: 10)", "minimum": 1, "maximum": 50 } }, "required": ["query"] } ), types.Tool( name="get_paper_data", description="Get detailed information about a specific paper including abstract and available formats", inputSchema={ "type": "object", "properties": { "paper_id": { "type": "string", "description": "arXiv paper ID (e.g., '2103.08220')" } }, "required": ["paper_id"] } ), types.Tool( name="list_categories", description="List all available arXiv categories and how to use them in search", inputSchema={ "type": "object", "properties": { "primary_category": { "type": "string", "description": "Optional: filter by primary category (e.g., 'cs' for Computer Science)" } } } ), types.Tool( name="update_categories", description="Update the stored category taxonomy by fetching the latest version from arxiv.org", inputSchema={ "type": "object", "properties": {}, } ) ] @app.call_tool() async def call_tool(name: str, arguments: dict) -> list[types.TextContent]: """Handle tool calls for arXiv operations.""" try: if name == "search_papers": query = arguments["query"] max_results = min(int(arguments.get("max_results", 10)), 50) papers = await arxiv_client.search(query, max_results) # Format results in a readable way result = "Search Results:\n\n" for i, paper in enumerate(papers, 1): result += f"{i}. {paper['title']}\n" result += f" Authors: {', '.join(paper['authors'])}\n" result += f" ID: {paper['id']}\n" result += f" Categories: " if paper['primary_category']: result += f"Primary: {paper['primary_category']}" if paper['categories']: result += f", Additional: {', '.join(paper['categories'])}" result += f"\n Published: {paper['published']}\n" # Add first sentence of abstract abstract_preview = get_first_sentence(paper['summary']) result += f" Preview: {abstract_preview}\n" result += "\n" return [types.TextContent(type="text", text=result)] elif name == "get_paper_data": paper_id = arguments["paper_id"] paper = await arxiv_client.get_paper(paper_id) # Format paper details in a readable way with clear sections result = f"Title: {paper['title']}\n\n" # Metadata section result += "Metadata:\n" result += f"- Authors: {', '.join(paper['authors'])}\n" result += f"- Published: {paper['published']}\n" result += f"- Last Updated: {paper['updated']}\n" result += "- Categories: " if paper['primary_category']: result += f"Primary: {paper['primary_category']}" if paper['categories']: result += f", Additional: {', '.join(paper['categories'])}" result += "\n" if paper['doi']: result += f"- DOI: {paper['doi']}\n" if paper["journal_ref"]: result += f"- Journal Reference: {paper['journal_ref']}\n" # Abstract section result += "\nAbstract:\n" result += paper["summary"] result += "\n" # Access options section result += "\nAccess Options:\n" result += "- Abstract page: " + paper["abstract_url"] + "\n" if paper["html_url"]: # Add HTML version if available result += "- Full text HTML version: " + paper["html_url"] + "\n" result += "- PDF version: " + paper["pdf_url"] + "\n" # Additional information section if paper["comment"] or "code" in paper["comment"].lower(): result += "\nAdditional Information:\n" if paper["comment"]: result += "- Comment: " + paper["comment"] + "\n" return [types.TextContent(type="text", text=result)] elif name == "list_categories": try: taxonomy = load_taxonomy() except Exception as e: logger.error(f"Error loading taxonomy: {e}") return [types.TextContent(type="text", text=f"Error loading category taxonomy. Try using update_categories tool to refresh it.")] primary_filter = arguments.get("primary_category") result = "arXiv Categories:\n\n" for primary, data in taxonomy.items(): if primary_filter and primary != primary_filter: continue result += f"{primary}: {data['name']}\n" for code, desc in data['subcategories'].items(): result += f" {primary}.{code}: {desc}\n" result += "\n" result += "\nUsage in search:\n" result += '- Search in specific category: cat:cs.AI\n' result += '- Combine with other terms: "neural networks" AND cat:cs.AI\n' result += '- Multiple categories: (cat:cs.AI OR cat:cs.LG)\n' result += '\nNote: If categories seem outdated, use the update_categories tool to refresh them.\n' return [types.TextContent(type="text", text=result)] elif name == "update_categories": try: taxonomy = update_taxonomy_file() result = "Successfully updated category taxonomy.\n\n" result += f"Found {len(taxonomy)} primary categories:\n" for primary, data in taxonomy.items(): result += f"- {primary}: {data['name']} ({len(data['subcategories'])} subcategories)\n" return [types.TextContent(type="text", text=result)] except Exception as e: logger.error(f"Error updating taxonomy: {e}") return [types.TextContent( type="text", text=f"Error updating taxonomy: {str(e)}", isError=True )] else: return [types.TextContent( type="text", text=f"Unknown tool: {name}", isError=True )] except Exception as e: logger.exception("Error handling tool call") return [types.TextContent( type="text", text=f"Error: {str(e)}", isError=True )] async def main(): """Run the MCP server.""" async with stdio_server() as (read_stream, write_stream): await app.run( read_stream, write_stream, app.create_initialization_options() ) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) asyncio.run(main())