BioMCP

Overview Schema Related Servers Score Discussions

biomcp
src
sources

uniprot.rs•20.5 KiB

use std::borrow::Cow; use std::cmp::Ordering; use std::io::Read; use flate2::read::GzDecoder; use reqwest::header::ACCEPT; use serde::Deserialize; use serde::de::DeserializeOwned; use crate::error::BioMcpError; const UNIPROT_BASE: &str = "https://rest.uniprot.org"; const UNIPROT_API: &str = "uniprot"; const UNIPROT_BASE_ENV: &str = "BIOMCP_UNIPROT_BASE"; pub struct UniProtClient { client: reqwest::Client, base: Cow<'static, str>, } #[derive(Debug, Clone)] pub struct UniProtSearchPage { pub results: Vec<UniProtRecord>, pub total: Option<usize>, pub next_page_token: Option<String>, } impl UniProtClient { pub fn new() -> Result<Self, BioMcpError> { Ok(Self { client: crate::sources::streaming_http_client()?, base: crate::sources::env_base(UNIPROT_BASE, UNIPROT_BASE_ENV), }) } #[cfg(test)] fn new_for_test(base: String) -> Result<Self, BioMcpError> { Ok(Self { client: crate::sources::streaming_http_client()?, base: Cow::Owned(base), }) } fn endpoint(&self, path: &str) -> String { format!( "{}/{}", self.base.as_ref().trim_end_matches('/'), path.trim_start_matches('/') ) } async fn get_json<T, F>(&self, build_request: F) -> Result<T, BioMcpError> where T: DeserializeOwned, F: Fn() -> reqwest::RequestBuilder, { let resp = crate::sources::retry_send(UNIPROT_API, 3, || async { build_request().send().await }) .await?; let status = resp.status(); let bytes = crate::sources::read_limited_body(resp, UNIPROT_API).await?; let mut payload = bytes.to_vec(); if payload.starts_with(&[0x1f, 0x8b]) { let mut decoder = GzDecoder::new(payload.as_slice()); let mut decoded = Vec::new(); decoder .read_to_end(&mut decoded) .map_err(|err| BioMcpError::Api { api: UNIPROT_API.to_string(), message: format!("Failed to decode gzip response: {err}"), })?; payload = decoded; } if !status.is_success() { let excerpt = crate::sources::body_excerpt(&payload); return Err(BioMcpError::Api { api: UNIPROT_API.to_string(), message: format!("HTTP {status}: {excerpt}"), }); } serde_json::from_slice(&payload).map_err(|source| { let excerpt = crate::sources::body_excerpt(&payload); BioMcpError::Api { api: UNIPROT_API.to_string(), message: format!("Invalid JSON response: {excerpt} ({source})"), } }) } pub async fn get_record(&self, accession: &str) -> Result<UniProtRecord, BioMcpError> { let accession = accession.trim(); if accession.is_empty() { return Err(BioMcpError::InvalidArgument( "UniProt accession is required".into(), )); } let url = self.endpoint(&format!("uniprotkb/{accession}.json")); crate::sources::rate_limit::wait_for_url_str(&url).await; self.get_json(|| self.client.get(&url).header(ACCEPT, "application/json")) .await } pub async fn search( &self, query: &str, limit: usize, offset: usize, next_page: Option<&str>, ) -> Result<UniProtSearchPage, BioMcpError> { let query = query.trim(); if query.is_empty() { return Err(BioMcpError::InvalidArgument( "UniProt query is required".into(), )); } let url = self.endpoint("uniprotkb/search"); let size = limit.clamp(1, 25).to_string(); let offset = offset.to_string(); crate::sources::rate_limit::wait_for_url_str(&url).await; let token = normalize_next_page_token(next_page)?; let token_for_request = token.clone(); let resp = crate::sources::retry_send(UNIPROT_API, 3, || async { if let Some(token) = token_for_request.as_deref() { if token.starts_with("http://") || token.starts_with("https://") { return self.client.get(token).header(ACCEPT, "application/json").send().await; } return self .client .get(&url) .header(ACCEPT, "application/json") .query(&[ ("query", query), ("format", "json"), ("size", size.as_str()), ("cursor", token), ( "fields", "accession,id,protein_name,gene_names,organism_name,length,cc_function,xref_pdb,xref_alphafolddb", ), ]) .send() .await; } self.client .get(&url) .header(ACCEPT, "application/json") .query(&[ ("query", query), ("format", "json"), ("size", size.as_str()), ("offset", offset.as_str()), ( "fields", "accession,id,protein_name,gene_names,organism_name,length,cc_function,xref_pdb,xref_alphafolddb", ), ]) .send() .await }) .await?; let status = resp.status(); let total = resp .headers() .get("x-total-results") .and_then(|value| value.to_str().ok()) .and_then(|value| value.parse::<usize>().ok()); let next_page_token = parse_uniprot_next_link(resp.headers().get("link")); let bytes = crate::sources::read_limited_body(resp, UNIPROT_API).await?; let mut payload = bytes.to_vec(); if payload.starts_with(&[0x1f, 0x8b]) { let mut decoder = GzDecoder::new(payload.as_slice()); let mut decoded = Vec::new(); decoder .read_to_end(&mut decoded) .map_err(|err| BioMcpError::Api { api: UNIPROT_API.to_string(), message: format!("Failed to decode gzip response: {err}"), })?; payload = decoded; } if !status.is_success() { let excerpt = crate::sources::body_excerpt(&payload); return Err(BioMcpError::Api { api: UNIPROT_API.to_string(), message: format!("HTTP {status}: {excerpt}"), }); } let parsed: UniProtSearchResponse = serde_json::from_slice(&payload).map_err(|source| BioMcpError::Api { api: UNIPROT_API.to_string(), message: format!( "Invalid JSON response: {} ({source})", crate::sources::body_excerpt(&payload) ), })?; Ok(UniProtSearchPage { results: parsed.results, total, next_page_token, }) } } fn parse_uniprot_next_link(value: Option<&reqwest::header::HeaderValue>) -> Option<String> { let raw = value?.to_str().ok()?; for part in raw.split(',') { let piece = part.trim(); if !piece.contains("rel=\"next\"") { continue; } let start = piece.find('<')?; let end = piece[start + 1..].find('>')?; let url = piece[start + 1..start + 1 + end].trim(); if !url.is_empty() { return Some(url.to_string()); } } None } fn normalize_next_page_token(next_page: Option<&str>) -> Result<Option<String>, BioMcpError> { let Some(token) = next_page .map(str::trim) .filter(|value| !value.is_empty()) .map(str::to_string) else { return Ok(None); }; if token.len() > 2048 { return Err(BioMcpError::InvalidArgument( "--next-page token is too long".into(), )); } if token.chars().all(|ch| ch.is_ascii_digit()) { return Err(BioMcpError::InvalidArgument( "--next-page token is invalid. Use pagination.next_page_token from the previous result." .into(), )); } if token.chars().any(|ch| ch.is_whitespace()) { return Err(BioMcpError::InvalidArgument( "--next-page token must not contain whitespace".into(), )); } if token.starts_with("http://") || token.starts_with("https://") { let parsed = reqwest::Url::parse(&token).map_err(|_| { BioMcpError::InvalidArgument( "--next-page token URL is invalid. Use pagination.next_page_token from the previous result." .into(), ) })?; if parsed.host_str() != Some("rest.uniprot.org") { return Err(BioMcpError::InvalidArgument( "--next-page token must be a rest.uniprot.org URL. Use pagination.next_page_token from the previous result.".into(), )); } } Ok(Some(token)) } #[derive(Debug, Clone, Deserialize)] pub struct UniProtSearchResponse { #[serde(default)] pub results: Vec<UniProtRecord>, } #[derive(Debug, Clone, Deserialize)] #[serde(rename_all = "camelCase")] pub struct UniProtRecord { #[serde(default)] pub primary_accession: String, #[serde(rename = "uniProtkbId")] pub uni_prot_kb_id: Option<String>, pub protein_description: Option<UniProtProteinDescription>, #[serde(default)] pub genes: Vec<UniProtGene>, pub organism: Option<UniProtOrganism>, pub sequence: Option<UniProtSequence>, #[serde(default)] pub comments: Vec<UniProtComment>, #[serde(rename = "uniProtKBCrossReferences", default)] pub uni_prot_kb_cross_references: Vec<UniProtCrossReference>, } #[derive(Debug, Clone, Deserialize)] #[serde(rename_all = "camelCase")] pub struct UniProtProteinDescription { pub recommended_name: Option<UniProtNameContainer>, pub submission_names: Option<Vec<UniProtNameContainer>>, } #[derive(Debug, Clone, Deserialize)] #[serde(rename_all = "camelCase")] pub struct UniProtNameContainer { pub full_name: Option<UniProtTextValue>, } #[derive(Debug, Clone, Deserialize)] pub struct UniProtTextValue { pub value: String, } #[derive(Debug, Clone, Deserialize)] #[serde(rename_all = "camelCase")] pub struct UniProtGene { pub gene_name: Option<UniProtTextValue>, } #[derive(Debug, Clone, Deserialize)] #[serde(rename_all = "camelCase")] pub struct UniProtOrganism { pub scientific_name: Option<String>, } #[derive(Debug, Clone, Deserialize)] pub struct UniProtSequence { pub length: Option<u32>, } #[derive(Debug, Clone, Deserialize)] #[serde(rename_all = "camelCase")] pub struct UniProtComment { pub comment_type: Option<String>, #[serde(default)] pub texts: Vec<UniProtTextValue>, } #[derive(Debug, Clone, Deserialize)] #[serde(rename_all = "camelCase")] pub struct UniProtCrossReference { pub database: Option<String>, pub id: Option<String>, #[serde(default)] pub properties: Vec<UniProtCrossReferenceProperty>, } #[derive(Debug, Clone, Deserialize)] pub struct UniProtCrossReferenceProperty { pub key: Option<String>, pub value: Option<String>, } impl UniProtRecord { pub fn display_name(&self) -> String { if let Some(desc) = self.protein_description.as_ref() { if let Some(value) = desc .recommended_name .as_ref() .and_then(|v| v.full_name.as_ref()) .map(|v| v.value.trim()) .filter(|v| !v.is_empty()) { return value.to_string(); } if let Some(value) = desc .submission_names .as_ref() .and_then(|v| v.first()) .and_then(|v| v.full_name.as_ref()) .map(|v| v.value.trim()) .filter(|v| !v.is_empty()) { return value.to_string(); } } self.primary_accession.clone() } pub fn primary_gene_symbol(&self) -> Option<String> { self.genes .first() .and_then(|g| g.gene_name.as_ref()) .map(|g| g.value.trim().to_string()) .filter(|v| !v.is_empty()) } pub fn function_summary(&self) -> Option<String> { self.comments .iter() .find(|c| { c.comment_type .as_deref() .map(str::trim) .is_some_and(|v| v.eq_ignore_ascii_case("function")) }) .and_then(|c| c.texts.first()) .map(|v| v.value.trim().to_string()) .filter(|v| !v.is_empty()) } pub fn structure_ids(&self) -> Vec<String> { let mut out = Vec::new(); for x in &self.uni_prot_kb_cross_references { let Some(db) = x.database.as_deref().map(str::trim) else { continue; }; let Some(id) = x.id.as_deref().map(str::trim) else { continue; }; if id.is_empty() { continue; } if !matches!(db, "PDB" | "AlphaFoldDB") { continue; } if out.iter().any(|v: &String| v == id) { continue; } out.push(id.to_string()); } out } pub fn structure_count(&self) -> usize { self.structure_ids().len() } pub fn structure_summaries(&self, limit: usize) -> Vec<String> { #[derive(Debug)] struct PdbRow { id: String, method: Option<String>, resolution_text: Option<String>, resolution_value: Option<f64>, } let limit = limit.max(1); let mut seen: Vec<String> = Vec::new(); let mut pdb_rows: Vec<PdbRow> = Vec::new(); let mut other_rows: Vec<String> = Vec::new(); for x in &self.uni_prot_kb_cross_references { let Some(db) = x.database.as_deref().map(str::trim) else { continue; }; let Some(id) = x.id.as_deref().map(str::trim) else { continue; }; if id.is_empty() { continue; } if !matches!(db, "PDB" | "AlphaFoldDB") { continue; } if seen.iter().any(|v| v == id) { continue; } seen.push(id.to_string()); if db == "PDB" { let method = cross_ref_property(x, "Method"); let resolution_text = cross_ref_property(x, "Resolution") .map(|v| v.trim().to_string()) .filter(|v| !v.is_empty() && v != "-"); let resolution_value = resolution_text .as_deref() .and_then(parse_resolution_angstrom); pdb_rows.push(PdbRow { id: id.to_string(), method, resolution_text, resolution_value, }); } else { other_rows.push(format!("{id} (AlphaFold model)")); } } pdb_rows.sort_by(|a, b| match (a.resolution_value, b.resolution_value) { (Some(lhs), Some(rhs)) => lhs.partial_cmp(&rhs).unwrap_or(Ordering::Equal), (Some(_), None) => Ordering::Less, (None, Some(_)) => Ordering::Greater, (None, None) => a.id.cmp(&b.id), }); let mut out: Vec<String> = Vec::new(); for row in pdb_rows { let line = match (row.method.as_deref(), row.resolution_text.as_deref()) { (Some(method), Some(resolution)) => format!("{} ({method}, {resolution})", row.id), (Some(method), None) => format!("{} ({method})", row.id), (None, Some(resolution)) => format!("{} ({resolution})", row.id), (None, None) => row.id, }; out.push(line); if out.len() >= limit { return out; } } for row in other_rows { out.push(row); if out.len() >= limit { break; } } out } } fn cross_ref_property(row: &UniProtCrossReference, key: &str) -> Option<String> { row.properties.iter().find_map(|p| { let matches = p .key .as_deref() .map(str::trim) .is_some_and(|k| k.eq_ignore_ascii_case(key)); if !matches { return None; } p.value .as_deref() .map(str::trim) .filter(|v| !v.is_empty()) .map(str::to_string) }) } fn parse_resolution_angstrom(value: &str) -> Option<f64> { let token = value .trim() .trim_end_matches('A') .trim_end_matches('a') .trim(); let token = token.split_whitespace().next()?; token.parse::<f64>().ok() } #[cfg(test)] mod tests { use super::*; use wiremock::matchers::{method, path, query_param}; use wiremock::{Mock, MockServer, ResponseTemplate}; #[tokio::test] async fn search_sets_expected_query_params() { let server = MockServer::start().await; Mock::given(method("GET")) .and(path("/uniprotkb/search")) .and(query_param("query", "BRAF")) .and(query_param("format", "json")) .and(query_param("size", "3")) .and(query_param("offset", "0")) .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ "results": [{ "primaryAccession": "P15056", "uniProtkbId": "BRAF_HUMAN", "proteinDescription": { "recommendedName": {"fullName": {"value": "Serine/threonine-protein kinase B-raf"}} }, "genes": [{"geneName": {"value": "BRAF"}}] }] }))) .mount(&server) .await; let client = UniProtClient::new_for_test(server.uri()).unwrap(); let page = client.search("BRAF", 3, 0, None).await.unwrap(); assert_eq!(page.results.len(), 1); assert_eq!(page.results[0].primary_accession, "P15056"); assert_eq!( page.results[0].primary_gene_symbol().as_deref(), Some("BRAF") ); } #[test] fn record_helpers_extract_display_function_and_structures() { let record: UniProtRecord = serde_json::from_value(serde_json::json!({ "primaryAccession": "P15056", "proteinDescription": { "recommendedName": {"fullName": {"value": " Kinase X "}} }, "comments": [ {"commentType": "FUNCTION", "texts": [{"value": " Signal transduction. "}]} ], "uniProtKBCrossReferences": [ { "database": "PDB", "id": "1UWH", "properties": [ {"key": "Method", "value": "X-ray"}, {"key": "Resolution", "value": "2.95 A"} ] }, {"database": "PDB", "id": "1UWH"}, {"database": "AlphaFoldDB", "id": "AF-P15056-F1"}, {"database": "GO", "id": "GO:0004672"} ] })) .unwrap(); assert_eq!(record.display_name(), "Kinase X"); assert_eq!( record.function_summary().as_deref(), Some("Signal transduction.") ); assert_eq!( record.structure_ids(), vec!["1UWH".to_string(), "AF-P15056-F1".to_string()] ); assert_eq!(record.structure_count(), 2); assert_eq!( record.structure_summaries(10), vec![ "1UWH (X-ray, 2.95 A)".to_string(), "AF-P15056-F1 (AlphaFold model)".to_string() ] ); } #[test] fn normalize_next_page_token_rejects_numeric_only_tokens() { let err = normalize_next_page_token(Some("12345")).expect_err("numeric token should fail"); assert!(err.to_string().contains("--next-page token is invalid")); } #[test] fn normalize_next_page_token_accepts_cursor_url() { let token = normalize_next_page_token(Some("https://rest.uniprot.org/uniprotkb/search?cursor=abc")) .expect("valid URL token"); assert!(token.is_some()); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/genomoncology/biomcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

uniprot.rs•20.5 KiB