mcp-simple-arxiv
by andybrandt
- mcp-simple-arxiv
- mcp_simple_arxiv
"""
MCP server for accessing arXiv papers.
"""
import sys
sys.stdout.reconfigure(encoding='utf-8')
sys.stdin.reconfigure(encoding='utf-8')
import asyncio
import json
import logging
from typing import Any, Sequence
from mcp.server import Server
import mcp.types as types
from mcp.server.stdio import stdio_server
from .arxiv_client import ArxivClient
from .update_taxonomy import load_taxonomy, update_taxonomy_file
logger = logging.getLogger(__name__)
def get_first_sentence(text: str, max_len: int = 200) -> str:
"""Extract first sentence from text, limiting length."""
# Look for common sentence endings
for end in ['. ', '! ', '? ']:
pos = text.find(end)
if pos != -1 and pos < max_len:
return text[:pos + 1]
# If no sentence ending found, just take first max_len chars
if len(text) > max_len:
return text[:max_len].rstrip() + '...'
return text
app = Server("arxiv-server")
arxiv_client = ArxivClient()
@app.list_tools()
async def list_tools() -> list[types.Tool]:
"""List available tools for interacting with arXiv."""
return [
types.Tool(
name="search_papers",
description="""Search for papers on arXiv by title and abstract content.
You can use advanced search syntax:
- Search in title: ti:"search terms"
- Search in abstract: abs:"search terms"
- Search by author: au:"author name"
- Combine terms with: AND, OR, ANDNOT
- Filter by category: cat:cs.AI (use list_categories tool to see available categories)
Examples:
- "machine learning" (searches all fields)
- ti:"neural networks" AND cat:cs.AI (title with category)
- au:bengio AND ti:"deep learning" (author and title)""",
inputSchema={
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query to match against paper titles and abstracts"
},
"max_results": {
"type": "number",
"description": "Maximum number of results to return (default: 10)",
"minimum": 1,
"maximum": 50
}
},
"required": ["query"]
}
),
types.Tool(
name="get_paper_data",
description="Get detailed information about a specific paper including abstract and available formats",
inputSchema={
"type": "object",
"properties": {
"paper_id": {
"type": "string",
"description": "arXiv paper ID (e.g., '2103.08220')"
}
},
"required": ["paper_id"]
}
),
types.Tool(
name="list_categories",
description="List all available arXiv categories and how to use them in search",
inputSchema={
"type": "object",
"properties": {
"primary_category": {
"type": "string",
"description": "Optional: filter by primary category (e.g., 'cs' for Computer Science)"
}
}
}
),
types.Tool(
name="update_categories",
description="Update the stored category taxonomy by fetching the latest version from arxiv.org",
inputSchema={
"type": "object",
"properties": {},
}
)
]
@app.call_tool()
async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
"""Handle tool calls for arXiv operations."""
try:
if name == "search_papers":
query = arguments["query"]
max_results = min(int(arguments.get("max_results", 10)), 50)
papers = await arxiv_client.search(query, max_results)
# Format results in a readable way
result = "Search Results:\n\n"
for i, paper in enumerate(papers, 1):
result += f"{i}. {paper['title']}\n"
result += f" Authors: {', '.join(paper['authors'])}\n"
result += f" ID: {paper['id']}\n"
result += f" Categories: "
if paper['primary_category']:
result += f"Primary: {paper['primary_category']}"
if paper['categories']:
result += f", Additional: {', '.join(paper['categories'])}"
result += f"\n Published: {paper['published']}\n"
# Add first sentence of abstract
abstract_preview = get_first_sentence(paper['summary'])
result += f" Preview: {abstract_preview}\n"
result += "\n"
return [types.TextContent(type="text", text=result)]
elif name == "get_paper_data":
paper_id = arguments["paper_id"]
paper = await arxiv_client.get_paper(paper_id)
# Format paper details in a readable way with clear sections
result = f"Title: {paper['title']}\n\n"
# Metadata section
result += "Metadata:\n"
result += f"- Authors: {', '.join(paper['authors'])}\n"
result += f"- Published: {paper['published']}\n"
result += f"- Last Updated: {paper['updated']}\n"
result += "- Categories: "
if paper['primary_category']:
result += f"Primary: {paper['primary_category']}"
if paper['categories']:
result += f", Additional: {', '.join(paper['categories'])}"
result += "\n"
if paper['doi']:
result += f"- DOI: {paper['doi']}\n"
if paper["journal_ref"]:
result += f"- Journal Reference: {paper['journal_ref']}\n"
# Abstract section
result += "\nAbstract:\n"
result += paper["summary"]
result += "\n"
# Access options section
result += "\nAccess Options:\n"
result += "- Abstract page: " + paper["abstract_url"] + "\n"
if paper["html_url"]: # Add HTML version if available
result += "- Full text HTML version: " + paper["html_url"] + "\n"
result += "- PDF version: " + paper["pdf_url"] + "\n"
# Additional information section
if paper["comment"] or "code" in paper["comment"].lower():
result += "\nAdditional Information:\n"
if paper["comment"]:
result += "- Comment: " + paper["comment"] + "\n"
return [types.TextContent(type="text", text=result)]
elif name == "list_categories":
try:
taxonomy = load_taxonomy()
except Exception as e:
logger.error(f"Error loading taxonomy: {e}")
return [types.TextContent(type="text", text=f"Error loading category taxonomy. Try using update_categories tool to refresh it.")]
primary_filter = arguments.get("primary_category")
result = "arXiv Categories:\n\n"
for primary, data in taxonomy.items():
if primary_filter and primary != primary_filter:
continue
result += f"{primary}: {data['name']}\n"
for code, desc in data['subcategories'].items():
result += f" {primary}.{code}: {desc}\n"
result += "\n"
result += "\nUsage in search:\n"
result += '- Search in specific category: cat:cs.AI\n'
result += '- Combine with other terms: "neural networks" AND cat:cs.AI\n'
result += '- Multiple categories: (cat:cs.AI OR cat:cs.LG)\n'
result += '\nNote: If categories seem outdated, use the update_categories tool to refresh them.\n'
return [types.TextContent(type="text", text=result)]
elif name == "update_categories":
try:
taxonomy = update_taxonomy_file()
result = "Successfully updated category taxonomy.\n\n"
result += f"Found {len(taxonomy)} primary categories:\n"
for primary, data in taxonomy.items():
result += f"- {primary}: {data['name']} ({len(data['subcategories'])} subcategories)\n"
return [types.TextContent(type="text", text=result)]
except Exception as e:
logger.error(f"Error updating taxonomy: {e}")
return [types.TextContent(
type="text",
text=f"Error updating taxonomy: {str(e)}",
isError=True
)]
else:
return [types.TextContent(
type="text",
text=f"Unknown tool: {name}",
isError=True
)]
except Exception as e:
logger.exception("Error handling tool call")
return [types.TextContent(
type="text",
text=f"Error: {str(e)}",
isError=True
)]
async def main():
"""Run the MCP server."""
async with stdio_server() as (read_stream, write_stream):
await app.run(
read_stream,
write_stream,
app.create_initialization_options()
)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
asyncio.run(main())