get_protein_info
Retrieve protein function and sequence details from UniProt by entering an accession number.
Instructions
Get protein function and sequence information from UniProt using an accession No.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| accession | Yes | UniProt Accession No. (e.g., P12345) |
Implementation Reference
- src/uniprot_mcp_server/server.py:112-166 (handler)Core handler function that fetches protein information from the UniProt API, extracts relevant fields (name, function, sequence, length, organism), implements caching, and returns a ProteinInfo dict.
async def fetch_protein_info(accession: str) -> ProteinInfo: """Fetch protein information from UniProt API with caching.""" # Check cache first cached_data = self.cache.get(accession) if cached_data: logger.info(f"Cache hit for {accession}") return cached_data logger.info(f"Fetching data for {accession}") async with httpx.AsyncClient() as client: response = await client.get( f"{API_BASE_URL}/{accession}", headers={"Accept": "application/json"}, ) response.raise_for_status() data = response.json() # Extract relevant information protein_info: ProteinInfo = { "accession": accession, "protein_name": data.get("proteinDescription", {}) .get("recommendedName", {}) .get("fullName", {}) .get("value", "Unknown"), "function": [], "sequence": "", "length": 0, "organism": "Unknown", } # Extract function information safely for comment in data.get("comments", []): if comment.get("commentType") == "FUNCTION": texts = comment.get("texts", []) if texts: protein_info["function"].extend( [text.get("value", "") for text in texts] ) # Add sequence information seq_info = data.get("sequence", {}) org_info = data.get("organism", {}) protein_info.update( { "sequence": seq_info.get("value", ""), "length": seq_info.get("length", 0), "organism": org_info.get("scientificName", "Unknown"), } ) # Cache the result self.cache.set(accession, protein_info) return protein_info - src/uniprot_mcp_server/server.py:173-183 (handler)Dispatch logic within the call_tool handler specifically for 'get_protein_info': validates input, calls fetch_protein_info, and formats response as JSON TextContent.
if name == "get_protein_info": accession = arguments.get("accession") if not accession: raise ValueError("Accession No. is required") protein_info = await fetch_protein_info(accession) return [ TextContent( type="text", text=json.dumps(protein_info, indent=2) ) ] - src/uniprot_mcp_server/server.py:79-94 (registration)Tool registration in list_tools(): defines name, description, and inputSchema for 'get_protein_info'.
name="get_protein_info", description=( "Get protein function and sequence information from UniProt " "using an accession No." ), inputSchema={ "type": "object", "properties": { "accession": { "type": "string", "description": "UniProt Accession No. (e.g., P12345)", } }, "required": ["accession"], }, ), - ProteinInfo TypedDict: schema for the output structure of protein information.
class ProteinInfo(TypedDict): """Type definition for protein information.""" accession: str protein_name: str function: list[str] sequence: str length: int organism: str - Cache class: helper utility used by fetch_protein_info for TTL-based caching of API results.
class Cache: """Simple cache implementation with TTL and max size limit.""" def __init__(self, max_size: int = 100, ttl_hours: int = 24) -> None: """Initialize cache with size and TTL limits.""" self.cache: OrderedDict[str, Tuple[Any, datetime]] = OrderedDict() self.max_size = max_size self.ttl = timedelta(hours=ttl_hours) def get(self, key: str) -> Optional[Any]: """Get a value from cache if it exists and hasn't expired.""" if key not in self.cache: return None item, timestamp = self.cache[key] if datetime.now() - timestamp > self.ttl: del self.cache[key] return None return item def set(self, key: str, value: Any) -> None: """Set a value in cache with current timestamp.""" if len(self.cache) >= self.max_size: self.cache.popitem(last=False) self.cache[key] = (value, datetime.now())