UniProt MCP Server
by TakumiY235
- src
- uniprot_mcp_server
"""UniProt MCP Server implementation."""
import json
import logging
from collections import OrderedDict
from datetime import datetime, timedelta
from typing import Any, Optional, Sequence, Tuple, TypedDict
import httpx
from mcp.server import Server
from mcp.types import TextContent, Tool
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("uniprot-server")
# API configuration
API_BASE_URL = "https://rest.uniprot.org/uniprotkb"
class ProteinInfo(TypedDict):
"""Type definition for protein information."""
accession: str
protein_name: str
function: list[str]
sequence: str
length: int
organism: str
class Cache:
"""Simple cache implementation with TTL and max size limit."""
def __init__(self, max_size: int = 100, ttl_hours: int = 24) -> None:
"""Initialize cache with size and TTL limits."""
self.cache: OrderedDict[str, Tuple[Any, datetime]] = OrderedDict()
self.max_size = max_size
self.ttl = timedelta(hours=ttl_hours)
def get(self, key: str) -> Optional[Any]:
"""Get a value from cache if it exists and hasn't expired."""
if key not in self.cache:
return None
item, timestamp = self.cache[key]
if datetime.now() - timestamp > self.ttl:
del self.cache[key]
return None
return item
def set(self, key: str, value: Any) -> None:
"""Set a value in cache with current timestamp."""
if len(self.cache) >= self.max_size:
self.cache.popitem(last=False)
self.cache[key] = (value, datetime.now())
class UniProtServer:
"""MCP server for UniProt protein data access."""
def __init__(self) -> None:
"""Initialize the server with cache and handlers."""
self.server = Server("uniprot-server")
self.cache = Cache()
self.setup_handlers()
def setup_handlers(self) -> None:
"""Set up MCP protocol handlers."""
self.setup_tool_handlers()
def setup_tool_handlers(self) -> None:
"""Configure tool-related request handlers."""
@self.server.list_tools()
async def list_tools() -> list[Tool]:
"""List available UniProt tools."""
return [
Tool(
name="get_protein_info",
description=(
"Get protein function and sequence information from UniProt "
"using an accession No."
),
inputSchema={
"type": "object",
"properties": {
"accession": {
"type": "string",
"description": "UniProt Accession No. (e.g., P12345)",
}
},
"required": ["accession"],
},
),
Tool(
name="get_batch_protein_info",
description="Get protein information for multiple accession No.",
inputSchema={
"type": "object",
"properties": {
"accessions": {
"type": "array",
"items": {"type": "string"},
"description": "List of UniProt accession No.",
}
},
"required": ["accessions"],
},
),
]
async def fetch_protein_info(accession: str) -> ProteinInfo:
"""Fetch protein information from UniProt API with caching."""
# Check cache first
cached_data = self.cache.get(accession)
if cached_data:
logger.info(f"Cache hit for {accession}")
return cached_data
logger.info(f"Fetching data for {accession}")
async with httpx.AsyncClient() as client:
response = await client.get(
f"{API_BASE_URL}/{accession}",
headers={"Accept": "application/json"},
)
response.raise_for_status()
data = response.json()
# Extract relevant information
protein_info: ProteinInfo = {
"accession": accession,
"protein_name": data.get("proteinDescription", {})
.get("recommendedName", {})
.get("fullName", {})
.get("value", "Unknown"),
"function": [],
"sequence": "",
"length": 0,
"organism": "Unknown",
}
# Extract function information safely
for comment in data.get("comments", []):
if comment.get("commentType") == "FUNCTION":
texts = comment.get("texts", [])
if texts:
protein_info["function"].extend(
[text.get("value", "") for text in texts]
)
# Add sequence information
seq_info = data.get("sequence", {})
org_info = data.get("organism", {})
protein_info.update(
{
"sequence": seq_info.get("value", ""),
"length": seq_info.get("length", 0),
"organism": org_info.get("scientificName", "Unknown"),
}
)
# Cache the result
self.cache.set(accession, protein_info)
return protein_info
@self.server.call_tool()
async def call_tool(
name: str, arguments: dict[str, Any]
) -> Sequence[TextContent]:
"""Handle tool execution requests."""
try:
if name == "get_protein_info":
accession = arguments.get("accession")
if not accession:
raise ValueError("Accession No. is required")
protein_info = await fetch_protein_info(accession)
return [
TextContent(
type="text", text=json.dumps(protein_info, indent=2)
)
]
elif name == "get_batch_protein_info":
accessions = arguments.get("accessions", [])
if not accessions:
raise ValueError("At least one accession No. is required")
results = []
for accession in accessions:
try:
protein_info = await fetch_protein_info(accession)
results.append(protein_info)
except httpx.HTTPError as e:
results.append(
{
"accession": accession,
"error": f"Failed to fetch data: {str(e)}",
}
)
return [
TextContent(type="text", text=json.dumps(results, indent=2))
]
else:
raise ValueError(f"Unknown tool: {name}")
except httpx.HTTPError as e:
logger.error(f"UniProt API error: {str(e)}")
return [
TextContent(
type="text",
text=f"Error fetching protein information: {str(e)}",
)
]
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
return [
TextContent(
type="text",
text=f"An unexpected error occurred: {str(e)}",
)
]
async def run(self) -> None:
"""Start the server using stdio transport."""
from mcp.server.stdio import stdio_server
async with stdio_server() as (read_stream, write_stream):
await self.server.run(
read_stream,
write_stream,
self.server.create_initialization_options(),
)
async def main() -> None:
"""Run the server."""
server = UniProtServer()
await server.run()
if __name__ == "__main__":
import asyncio
asyncio.run(main())