"""
MCP server for Papers with Code - SOTA benchmarks, tasks, datasets, and paper discovery.
Provides tools for searching papers, browsing tasks/datasets, and finding state-of-the-art results.
"""
import asyncio
from typing import List, Dict, Any, Optional
import httpx
from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.types import Tool, TextContent
# =============================================================================
# Papers with Code Client
# =============================================================================
BASE_URL = "https://paperswithcode.com/api/v1"
class PWCClient:
"""Client for Papers with Code API."""
def __init__(self):
self.http = httpx.AsyncClient(
base_url=BASE_URL,
timeout=30.0,
follow_redirects=True,
)
async def _fetch_page(
self,
path: str,
params: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Fetch a single page from the API."""
response = await self.http.get(path, params=params)
response.raise_for_status()
return response.json()
async def _fetch_all(
self,
path: str,
params: Optional[Dict[str, Any]] = None,
max_pages: int = 5,
) -> List[Dict[str, Any]]:
"""Fetch all results with pagination (up to max_pages)."""
results = []
url = f"{BASE_URL}{path}"
page_count = 0
while url and page_count < max_pages:
response = await self.http.get(url, params=params if page_count == 0 else None)
response.raise_for_status()
data = response.json()
results.extend(data.get("results", []))
url = data.get("next")
page_count += 1
return results
# =========================================================================
# Papers
# =========================================================================
async def search_papers(
self,
query: Optional[str] = None,
arxiv_id: Optional[str] = None,
page: int = 1,
items_per_page: int = 20,
) -> Dict[str, Any]:
"""Search for papers."""
params = {"page": page, "items_per_page": items_per_page}
if query:
params["q"] = query
if arxiv_id:
params["arxiv_id"] = arxiv_id
data = await self._fetch_page("/papers/", params)
return {
"count": data.get("count", 0),
"results": [
{
"id": p.get("id"),
"arxiv_id": p.get("arxiv_id"),
"title": p.get("title"),
"abstract": p.get("abstract", "")[:500] + "..." if p.get("abstract") and len(p.get("abstract", "")) > 500 else p.get("abstract"),
"authors": p.get("authors", []),
"published": p.get("published"),
"conference": p.get("conference"),
"url_abs": p.get("url_abs"),
"url_pdf": p.get("url_pdf"),
"pwc_url": f"https://paperswithcode.com/paper/{p.get('id')}" if p.get("id") else None,
}
for p in data.get("results", [])
],
}
async def get_paper(self, paper_id: str) -> Dict[str, Any]:
"""Get details for a specific paper."""
data = await self._fetch_page(f"/papers/{paper_id}/")
return {
"id": data.get("id"),
"arxiv_id": data.get("arxiv_id"),
"title": data.get("title"),
"abstract": data.get("abstract"),
"authors": data.get("authors", []),
"published": data.get("published"),
"conference": data.get("conference"),
"proceeding": data.get("proceeding"),
"url_abs": data.get("url_abs"),
"url_pdf": data.get("url_pdf"),
"pwc_url": f"https://paperswithcode.com/paper/{data.get('id')}" if data.get("id") else None,
}
async def paper_repositories(self, paper_id: str, limit: int = 10) -> List[Dict[str, Any]]:
"""Get code repositories for a paper."""
data = await self._fetch_page(f"/papers/{paper_id}/repositories/", {"items_per_page": limit})
return [
{
"url": r.get("url"),
"owner": r.get("owner"),
"name": r.get("name"),
"description": r.get("description"),
"stars": r.get("stars"),
"framework": r.get("framework"),
"is_official": r.get("is_official"),
}
for r in data.get("results", [])
]
async def paper_benchmarks(self, paper_id: str, limit: int = 20) -> List[Dict[str, Any]]:
"""Get benchmark results for a paper."""
data = await self._fetch_page(f"/papers/{paper_id}/results/", {"items_per_page": limit})
return [
{
"id": r.get("id"),
"task": r.get("task"),
"dataset": r.get("dataset"),
"best_rank": r.get("best_rank"),
"metrics": r.get("metrics", {}),
"methodology": r.get("methodology"),
"uses_additional_data": r.get("uses_additional_data"),
}
for r in data.get("results", [])
]
# =========================================================================
# Tasks
# =========================================================================
async def search_tasks(
self,
query: Optional[str] = None,
page: int = 1,
items_per_page: int = 20,
) -> Dict[str, Any]:
"""Search for ML tasks."""
params = {"page": page, "items_per_page": items_per_page}
if query:
params["q"] = query
data = await self._fetch_page("/tasks/", params)
return {
"count": data.get("count", 0),
"results": [
{
"id": t.get("id"),
"name": t.get("name"),
"description": t.get("description", "")[:300] + "..." if t.get("description") and len(t.get("description", "")) > 300 else t.get("description"),
"pwc_url": f"https://paperswithcode.com/task/{t.get('id')}" if t.get("id") else None,
}
for t in data.get("results", [])
],
}
async def task_leaderboards(self, task_id: str, limit: int = 10) -> List[Dict[str, Any]]:
"""Get leaderboards (evaluation tables) for a task."""
data = await self._fetch_page(f"/tasks/{task_id}/evaluations/", {"items_per_page": limit})
return [
{
"id": e.get("id"),
"task": e.get("task"),
"dataset": e.get("dataset"),
"description": e.get("description"),
"pwc_url": f"https://paperswithcode.com/sota/{e.get('id')}" if e.get("id") else None,
}
for e in data.get("results", [])
]
# =========================================================================
# Datasets
# =========================================================================
async def search_datasets(
self,
query: Optional[str] = None,
page: int = 1,
items_per_page: int = 20,
) -> Dict[str, Any]:
"""Search for datasets."""
params = {"page": page, "items_per_page": items_per_page}
if query:
params["q"] = query
data = await self._fetch_page("/datasets/", params)
return {
"count": data.get("count", 0),
"results": [
{
"id": d.get("id"),
"name": d.get("name"),
"full_name": d.get("full_name"),
"url": d.get("url"),
"pwc_url": f"https://paperswithcode.com/dataset/{d.get('id')}" if d.get("id") else None,
}
for d in data.get("results", [])
],
}
async def dataset_leaderboards(self, dataset_id: str, limit: int = 10) -> List[Dict[str, Any]]:
"""Get leaderboards (evaluation tables) for a dataset."""
data = await self._fetch_page(f"/datasets/{dataset_id}/evaluations/", {"items_per_page": limit})
return [
{
"id": e.get("id"),
"task": e.get("task"),
"dataset": e.get("dataset"),
"description": e.get("description"),
"pwc_url": f"https://paperswithcode.com/sota/{e.get('id')}" if e.get("id") else None,
}
for e in data.get("results", [])
]
# =========================================================================
# SOTA / Leaderboards
# =========================================================================
async def get_sota(
self,
evaluation_id: str,
limit: int = 10,
) -> Dict[str, Any]:
"""Get SOTA results from a leaderboard."""
# Get metrics first to understand the leaderboard
metrics_data = await self._fetch_page(f"/evaluations/{evaluation_id}/metrics/", {"items_per_page": 50})
metrics = [
{
"id": m.get("id"),
"name": m.get("name"),
"description": m.get("description"),
"is_loss": m.get("is_loss", False),
}
for m in metrics_data.get("results", [])
]
# Get results
results_data = await self._fetch_page(f"/evaluations/{evaluation_id}/results/", {"items_per_page": limit})
results = results_data.get("results", [])
# Sort by best_rank if available
ranked = [r for r in results if r.get("best_rank") is not None]
if ranked:
ranked.sort(key=lambda r: r["best_rank"])
results = ranked
return {
"evaluation_id": evaluation_id,
"metrics": metrics,
"results": [
{
"rank": r.get("best_rank"),
"methodology": r.get("methodology"),
"paper": r.get("paper"),
"metrics": r.get("metrics", {}),
"uses_additional_data": r.get("uses_additional_data"),
"external_source_url": r.get("external_source_url"),
}
for r in results[:limit]
],
"pwc_url": f"https://paperswithcode.com/sota/{evaluation_id}",
}
# =========================================================================
# Methods
# =========================================================================
async def search_methods(
self,
query: Optional[str] = None,
page: int = 1,
items_per_page: int = 20,
) -> Dict[str, Any]:
"""Search for methods/architectures."""
params = {"page": page, "items_per_page": items_per_page}
if query:
params["q"] = query
data = await self._fetch_page("/methods/", params)
return {
"count": data.get("count", 0),
"results": [
{
"id": m.get("id"),
"name": m.get("name"),
"full_name": m.get("full_name"),
"description": m.get("description", "")[:300] + "..." if m.get("description") and len(m.get("description", "")) > 300 else m.get("description"),
"paper": m.get("paper"),
"pwc_url": f"https://paperswithcode.com/method/{m.get('id')}" if m.get("id") else None,
}
for m in data.get("results", [])
],
}
# =========================================================================
# Repositories
# =========================================================================
async def search_repos(
self,
query: Optional[str] = None,
framework: Optional[str] = None,
min_stars: Optional[int] = None,
page: int = 1,
items_per_page: int = 20,
) -> Dict[str, Any]:
"""Search for code repositories."""
params = {"page": page, "items_per_page": items_per_page}
if query:
params["q"] = query
if framework:
params["framework"] = framework
if min_stars:
params["stars"] = min_stars
data = await self._fetch_page("/repositories/", params)
return {
"count": data.get("count", 0),
"results": [
{
"url": r.get("url"),
"owner": r.get("owner"),
"name": r.get("name"),
"description": r.get("description"),
"stars": r.get("stars"),
"framework": r.get("framework"),
}
for r in data.get("results", [])
],
}
async def close(self):
"""Close HTTP client."""
await self.http.aclose()
# =============================================================================
# MCP Server
# =============================================================================
async def serve():
"""Run the MCP server."""
client = PWCClient()
server = Server("paperswithcode-mcp")
@server.list_tools()
async def list_tools() -> List[Tool]:
"""List available tools."""
return [
# Paper Tools
Tool(
name="pwc_search_papers",
description="""Search for papers on Papers with Code.
Examples:
- "transformer" - Find transformer papers
- "diffusion model" - Find diffusion model papers
- arxiv_id="2312.00752" - Find specific arXiv paper
Returns: titles, abstracts, authors, arXiv links, PWC links.""",
inputSchema={
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query for title/abstract",
},
"arxiv_id": {
"type": "string",
"description": "Filter by arXiv ID (e.g., '2312.00752')",
},
"page": {
"type": "integer",
"default": 1,
"description": "Page number",
},
"items_per_page": {
"type": "integer",
"default": 20,
"minimum": 1,
"maximum": 50,
},
},
},
),
Tool(
name="pwc_get_paper",
description="""Get detailed information about a specific paper.
Example: pwc_get_paper("attention-is-all-you-need")
Returns: full abstract, authors, conference, URLs.""",
inputSchema={
"type": "object",
"properties": {
"paper_id": {
"type": "string",
"description": "PWC paper ID (slug from URL, e.g., 'attention-is-all-you-need')",
},
},
"required": ["paper_id"],
},
),
Tool(
name="pwc_paper_benchmarks",
description="""Get benchmark results for a paper.
Shows which leaderboards/evaluation tables the paper appears on and its rankings.
Example: pwc_paper_benchmarks("attention-is-all-you-need")""",
inputSchema={
"type": "object",
"properties": {
"paper_id": {
"type": "string",
"description": "PWC paper ID",
},
"limit": {
"type": "integer",
"default": 20,
"maximum": 50,
},
},
"required": ["paper_id"],
},
),
# Task Tools
Tool(
name="pwc_search_tasks",
description="""Search for ML tasks on Papers with Code.
Examples:
- "image classification" - Find image classification tasks
- "question answering" - Find QA tasks
- "object detection" - Find detection tasks
Returns: task names, descriptions, PWC links.""",
inputSchema={
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query for task name",
},
"page": {
"type": "integer",
"default": 1,
},
"items_per_page": {
"type": "integer",
"default": 20,
"maximum": 50,
},
},
},
),
Tool(
name="pwc_task_leaderboards",
description="""Get leaderboards (evaluation tables) for a task.
Example: pwc_task_leaderboards("image-classification")
Returns: list of leaderboards with dataset names.""",
inputSchema={
"type": "object",
"properties": {
"task_id": {
"type": "string",
"description": "Task ID (slug, e.g., 'image-classification')",
},
"limit": {
"type": "integer",
"default": 10,
"maximum": 50,
},
},
"required": ["task_id"],
},
),
# Dataset Tools
Tool(
name="pwc_search_datasets",
description="""Search for datasets on Papers with Code.
Examples:
- "imagenet" - Find ImageNet dataset
- "coco" - Find COCO dataset
- "squad" - Find SQuAD dataset
Returns: dataset names and URLs.""",
inputSchema={
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query for dataset name",
},
"page": {
"type": "integer",
"default": 1,
},
"items_per_page": {
"type": "integer",
"default": 20,
"maximum": 50,
},
},
},
),
Tool(
name="pwc_dataset_leaderboards",
description="""Get leaderboards for a dataset.
Example: pwc_dataset_leaderboards("imagenet")
Returns: list of leaderboards/tasks evaluated on this dataset.""",
inputSchema={
"type": "object",
"properties": {
"dataset_id": {
"type": "string",
"description": "Dataset ID (slug, e.g., 'imagenet')",
},
"limit": {
"type": "integer",
"default": 10,
"maximum": 50,
},
},
"required": ["dataset_id"],
},
),
# SOTA Tool
Tool(
name="pwc_get_sota",
description="""Get SOTA (state-of-the-art) results from a leaderboard.
This is the key tool for finding current best results on any benchmark.
Example: pwc_get_sota("image-classification-on-imagenet")
Returns: ranked results with methodologies, metrics, and paper links.""",
inputSchema={
"type": "object",
"properties": {
"evaluation_id": {
"type": "string",
"description": "Evaluation table ID (e.g., 'image-classification-on-imagenet')",
},
"limit": {
"type": "integer",
"default": 10,
"maximum": 50,
"description": "Number of top results to return",
},
},
"required": ["evaluation_id"],
},
),
# Method Tools
Tool(
name="pwc_search_methods",
description="""Search for methods/architectures on Papers with Code.
Examples:
- "transformer" - Find transformer architecture
- "resnet" - Find ResNet architecture
- "attention" - Find attention mechanisms
Returns: method names, descriptions, and originating papers.""",
inputSchema={
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query for method name",
},
"page": {
"type": "integer",
"default": 1,
},
"items_per_page": {
"type": "integer",
"default": 20,
"maximum": 50,
},
},
},
),
# Repository Tools
Tool(
name="pwc_search_repos",
description="""Search for code repositories on Papers with Code.
Examples:
- "llama" - Find LLaMA implementations
- framework="pytorch" - Filter by PyTorch repos
- min_stars=1000 - Only repos with 1000+ stars
Returns: repo URLs, stars, frameworks.""",
inputSchema={
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query",
},
"framework": {
"type": "string",
"description": "Filter by framework (e.g., 'pytorch', 'tensorflow', 'jax')",
},
"min_stars": {
"type": "integer",
"description": "Minimum GitHub stars",
},
"page": {
"type": "integer",
"default": 1,
},
"items_per_page": {
"type": "integer",
"default": 20,
"maximum": 50,
},
},
},
),
]
@server.call_tool()
async def call_tool(name: str, arguments: dict) -> List[TextContent]:
"""Handle tool calls."""
import json
try:
result: Any = None
# Paper Tools
if name == "pwc_search_papers":
result = await client.search_papers(
query=arguments.get("query"),
arxiv_id=arguments.get("arxiv_id"),
page=arguments.get("page", 1),
items_per_page=arguments.get("items_per_page", 20),
)
elif name == "pwc_get_paper":
result = await client.get_paper(arguments["paper_id"])
elif name == "pwc_paper_benchmarks":
result = await client.paper_benchmarks(
arguments["paper_id"],
limit=arguments.get("limit", 20),
)
# Task Tools
elif name == "pwc_search_tasks":
result = await client.search_tasks(
query=arguments.get("query"),
page=arguments.get("page", 1),
items_per_page=arguments.get("items_per_page", 20),
)
elif name == "pwc_task_leaderboards":
result = await client.task_leaderboards(
arguments["task_id"],
limit=arguments.get("limit", 10),
)
# Dataset Tools
elif name == "pwc_search_datasets":
result = await client.search_datasets(
query=arguments.get("query"),
page=arguments.get("page", 1),
items_per_page=arguments.get("items_per_page", 20),
)
elif name == "pwc_dataset_leaderboards":
result = await client.dataset_leaderboards(
arguments["dataset_id"],
limit=arguments.get("limit", 10),
)
# SOTA Tool
elif name == "pwc_get_sota":
result = await client.get_sota(
arguments["evaluation_id"],
limit=arguments.get("limit", 10),
)
# Method Tools
elif name == "pwc_search_methods":
result = await client.search_methods(
query=arguments.get("query"),
page=arguments.get("page", 1),
items_per_page=arguments.get("items_per_page", 20),
)
# Repository Tools
elif name == "pwc_search_repos":
result = await client.search_repos(
query=arguments.get("query"),
framework=arguments.get("framework"),
min_stars=arguments.get("min_stars"),
page=arguments.get("page", 1),
items_per_page=arguments.get("items_per_page", 20),
)
else:
return [TextContent(type="text", text=f"Unknown tool: {name}")]
# Format result
response_text = json.dumps(result, indent=2, default=str)
return [TextContent(type="text", text=response_text)]
except httpx.HTTPStatusError as e:
return [TextContent(type="text", text=f"API error: {e.response.status_code} - {e.response.text[:500]}")]
except Exception as e:
return [TextContent(type="text", text=f"Error: {type(e).__name__}: {e}")]
# Run server
try:
async with stdio_server() as (read_stream, write_stream):
await server.run(read_stream, write_stream, server.create_initialization_options())
finally:
await client.close()
def main():
"""Entry point."""
asyncio.run(serve())
if __name__ == "__main__":
main()