mcp_server.py•46.3 kB
"""
Noctua MCP Server
=================
Thin MCP wrapper around gocam-ai library for GO-CAM model manipulation.
This server exposes GO-CAM editing capabilities through the Model Context Protocol.
Security & credentials
----------------------
- Set BARISTA_TOKEN in environment before launch
- The token is used by the underlying gocam-ai BaristaClient
Transport
---------
- Runs via stdio transport by default
- Launch with: uvx noctua-mcp
"""
from typing import Any, Dict, List, Optional
from fastmcp import FastMCP
from noctua import BaristaClient
from noctua.amigo import AmigoClient
mcp = FastMCP(
"noctua-mcp",
instructions="""
Noctua MCP Server provides tools for editing GO-CAM models via the Barista API.
Use these tools to create and edit GO-CAM models with individuals, facts, and evidence.
Available operations:
- Create new empty GO-CAM models
- Add individuals (instances) of GO/ECO terms
- Add facts (edges) between individuals
- Add evidence to facts
- Remove individuals and facts
- Query model structure
- Create common GO-CAM patterns (e.g., basic pathway units)
"""
)
# Create a module-level client instance (will be lazily initialized)
_client: Optional[BaristaClient] = None
def get_client() -> BaristaClient:
"""Get or create the Barista client instance."""
global _client
if _client is None:
_client = BaristaClient()
return _client
@mcp.tool()
async def configure_token(token: str) -> Dict[str, Any]:
"""
Configure the Barista authentication token.
Args:
token: The Barista authentication token
Returns:
Success status
"""
import os
global _client
# Set environment variable
os.environ["BARISTA_TOKEN"] = token
# Reset client to pick up new token
_client = None
return {
"success": True,
"configured": True
}
@mcp.tool()
async def create_model(
title: Optional[str] = None
) -> Dict[str, Any]:
"""
Create a new empty GO-CAM model.
Args:
title: Optional title for the model
Returns:
Barista API response containing the new model ID and editor URLs
Examples:
# Create a new model with a title
response = create_model("RAS-RAF signaling pathway")
model_id = response["data"]["id"]
print(f"Graph editor: {response['graph_editor_url']}")
print(f"Pathway editor: {response['pathway_editor_url']}")
# Create a model with a descriptive title
response = create_model("Human Wnt signaling pathway")
# Create an unnamed model
response = create_model()
# Extract the model ID from response
if response["message-type"] == "success":
model_id = response["data"]["id"]
print(f"Created model: {model_id}")
Notes:
- The returned model_id can be used with other tools like add_individual
- Models are created in "development" state by default
- To add taxon information, use add_individual after creating the model
"""
client = get_client()
resp = client.create_model(title=title)
if resp.validation_failed:
return {
"success": False,
"error": "Validation failed",
"reason": resp.validation_reason,
"rolled_back": True
}
if resp.error:
return {
"success": False,
"error": "Operation failed",
"reason": resp.error
}
# Build minimal response
result = {
"success": True,
"model_id": resp.model_id,
"created": True
}
# Add editor URLs if we have a model ID
if resp.model_id:
import os
token = os.environ.get("BARISTA_TOKEN", "")
# Graph editor with token
result["graph_editor_url"] = f"http://noctua-dev.berkeleybop.org/editor/graph/{resp.model_id}?barista_token={token}"
# Pathway editor without token (URL encoded model ID)
from urllib.parse import quote
encoded_id = quote(resp.model_id, safe="")
result["pathway_editor_url"] = f"http://noctua.geneontology.org/workbench/noctua-visual-pathway-editor/?model_id={encoded_id}"
return result
@mcp.tool()
async def add_individual(
model_id: str,
class_curie: str,
class_label: str,
assign_var: str = "x1"
) -> Dict[str, Any]:
"""
Add an individual (instance) of a class to a GO-CAM model with label validation.
This tool requires providing the expected label for the class to prevent
accidental use of wrong IDs (e.g., GO:0003924 vs GO:0003925). The operation
will automatically rollback if the created individual doesn't match the
expected label.
Args:
model_id: The GO-CAM model identifier (e.g., "gomodel:12345")
class_curie: The class to instantiate (e.g., "GO:0003674")
class_label: The expected rdfs:label of the class (e.g., "molecular_function")
assign_var: Variable name for referencing in the same batch
Returns:
Barista API response with message-type and signal fields.
If validation fails, includes rolled_back=true and validation error.
Examples:
# Add a molecular function activity with validation
add_individual("gomodel:12345", "GO:0004672", "protein kinase activity", "mf1")
# Add a protein/gene product with validation
add_individual("gomodel:12345", "UniProtKB:P38398", "BRCA1", "gp1")
# Add a cellular component with validation
add_individual("gomodel:12345", "GO:0005737", "cytoplasm", "cc1")
# Add a biological process with validation
add_individual("gomodel:12345", "GO:0016055", "Wnt signaling pathway", "bp1")
# Add an evidence instance with validation
add_individual("gomodel:12345", "ECO:0000353", "physical interaction evidence", "ev1")
# Variables like "mf1", "gp1" can be referenced in subsequent
# add_fact calls within the same batch operation
Notes:
- The label acts as a checksum to prevent ID hallucination
- If the label doesn't match, the operation is automatically rolled back
- This prevents corrupt models from incorrect IDs
"""
client = get_client()
expected_type = {"id": class_curie, "label": class_label}
resp = client.add_individual_validated(model_id, class_curie, expected_type, assign_var)
if resp.validation_failed:
return {
"success": False,
"error": "Validation failed",
"reason": resp.validation_reason,
"rolled_back": True,
"expected_label": class_label,
"class_curie": class_curie
}
if resp.error:
return {
"success": False,
"error": resp.error,
"model_id": model_id,
"class_curie": class_curie
}
# Return minimal success response
return {
"success": True,
"individual_id": resp.individual_id if hasattr(resp, 'individual_id') else assign_var,
"class_curie": class_curie,
"assign_var": assign_var
}
@mcp.tool()
async def add_fact(
model_id: str,
subject_id: str,
object_id: str,
predicate_id: str
) -> Dict[str, Any]:
"""
Add a fact (edge/relation) between two individuals in a model.
Args:
model_id: The GO-CAM model identifier
subject_id: Subject individual ID or variable
object_id: Object individual ID or variable
predicate_id: Relation predicate (e.g., "RO:0002333" for enabled_by)
Returns:
Barista API response
Examples:
# Connect molecular function to gene product (enabled_by)
add_fact("gomodel:12345", "mf1", "gp1", "RO:0002333")
# Connect molecular function to cellular component (occurs_in)
add_fact("gomodel:12345", "mf1", "cc1", "BFO:0000066")
# Connect molecular function to biological process (part_of)
add_fact("gomodel:12345", "mf1", "bp1", "BFO:0000050")
# Add causal relationship between activities
add_fact("gomodel:12345", "mf1", "mf2", "RO:0002411") # causally upstream of
add_fact("gomodel:12345", "mf1", "mf2", "RO:0002629") # directly positively regulates
add_fact("gomodel:12345", "mf1", "mf2", "RO:0002630") # directly negatively regulates
add_fact("gomodel:12345", "mf1", "mf2", "RO:0002413") # provides input for
# Add regulates relationships
add_fact("gomodel:12345", "mf1", "bp1", "RO:0002211") # regulates
add_fact("gomodel:12345", "mf1", "bp1", "RO:0002213") # positively regulates
add_fact("gomodel:12345", "mf1", "bp1", "RO:0002212") # negatively regulates
# Add indirect regulation relationships
add_fact("gomodel:12345", "mf1", "mf2", "RO:0002407") # indirectly positively regulates
add_fact("gomodel:12345", "mf1", "mf2", "RO:0002409") # indirectly negatively regulates
# Add causal relationships with effects
add_fact("gomodel:12345", "mf1", "mf2", "RO:0002304") # causally upstream of, positive effect
add_fact("gomodel:12345", "mf1", "mf2", "RO:0002305") # causally upstream of, negative effect
# Add small molecule regulation relationships
add_fact("gomodel:12345", "sm1", "mf1", "RO:0012005") # is small molecule activator of
add_fact("gomodel:12345", "sm1", "mf1", "RO:0012006") # is small molecule inhibitor of
# Use with existing individual IDs from model
add_fact("gomodel:12345", "gomodel:12345/abc123", "gomodel:12345/def456", "RO:0002333")
"""
client = get_client()
req = client.req_add_fact(model_id, subject_id, object_id, predicate_id)
resp = client.m3_batch([req])
if resp.validation_failed:
return {
"success": False,
"error": "Validation failed",
"reason": resp.validation_reason,
"rolled_back": True,
"fact": {
"subject": subject_id,
"predicate": predicate_id,
"object": object_id
}
}
if resp.error:
return {
"success": False,
"error": resp.error,
"model_id": model_id,
"fact": {
"subject": subject_id,
"predicate": predicate_id,
"object": object_id
}
}
# Return minimal success response
return {
"success": True,
"fact_added": True
}
@mcp.tool()
async def add_evidence_to_fact(
model_id: str,
subject_id: str,
object_id: str,
predicate_id: str,
eco_id: str,
sources: List[str],
with_from: Optional[List[str]] = None
) -> Dict[str, Any]:
"""
Add evidence to an existing fact in a GO-CAM model.
Args:
model_id: The GO-CAM model identifier
subject_id: Subject of the fact
object_id: Object of the fact
predicate_id: Predicate of the fact
eco_id: Evidence code (e.g., "ECO:0000353")
sources: List of source references (e.g., ["PMID:12345"])
with_from: Optional list of with/from references
Returns:
Barista API response
Examples:
# Add experimental evidence from a paper
add_evidence_to_fact(
"gomodel:12345", "mf1", "gp1", "RO:0002333",
"ECO:0000353", # physical interaction evidence
["PMID:12345678"]
)
# Add multiple sources
add_evidence_to_fact(
"gomodel:12345", "mf1", "gp1", "RO:0002333",
"ECO:0000314", # direct assay evidence
["PMID:12345678", "PMID:87654321", "doi:10.1234/example"]
)
# Add evidence with with/from (e.g., for IPI)
add_evidence_to_fact(
"gomodel:12345", "mf1", "gp1", "RO:0002333",
"ECO:0000353", # IPI
["PMID:12345678"],
["UniProtKB:Q9Y6K9", "UniProtKB:P38398"] # interacting partners
)
# Common evidence codes:
# ECO:0000314 - direct assay evidence
# ECO:0000353 - physical interaction evidence (IPI)
# ECO:0000315 - mutant phenotype evidence (IMP)
# ECO:0000316 - genetic interaction evidence (IGI)
# ECO:0000318 - biological aspect of ancestor evidence (IBA)
# ECO:0000269 - experimental evidence
"""
client = get_client()
reqs = client.req_add_evidence_to_fact(
model_id, subject_id, object_id, predicate_id,
eco_id, sources, with_from
)
resp = client.m3_batch(reqs)
if resp.validation_failed:
return {
"success": False,
"error": "Validation failed",
"reason": resp.validation_reason,
"rolled_back": True,
"fact": {
"subject": subject_id,
"predicate": predicate_id,
"object": object_id
},
"evidence": {
"eco_id": eco_id,
"sources": sources,
"with_from": with_from
}
}
if resp.error:
return {
"success": False,
"error": resp.error,
"model_id": model_id,
"fact": {
"subject": subject_id,
"predicate": predicate_id,
"object": object_id
}
}
# Return minimal success response
return {
"success": True,
"evidence_added": True,
"eco_id": eco_id
}
@mcp.tool()
async def remove_individual(
model_id: str,
individual_id: str
) -> Dict[str, Any]:
"""
Remove an individual from a GO-CAM model.
Note: This will also remove all facts (edges) connected to this individual.
Args:
model_id: The GO-CAM model identifier
individual_id: The individual to remove
Returns:
Barista API response
Examples:
# Remove using a variable reference (within same batch)
remove_individual("gomodel:12345", "mf1")
# Remove using full individual ID
remove_individual("gomodel:12345", "gomodel:12345/5fce9b7300001215")
# Remove an evidence individual
remove_individual("gomodel:12345", "gomodel:12345/evidence_123")
# Clean up after testing
for ind_id in ["test1", "test2", "test3"]:
remove_individual("gomodel:12345", ind_id)
"""
client = get_client()
resp = client.remove_individual(model_id, individual_id)
if resp.validation_failed:
return {
"success": False,
"error": "Validation failed",
"reason": resp.validation_reason,
"rolled_back": True,
"individual_id": individual_id,
"model_id": model_id
}
if resp.error:
return {
"success": False,
"error": resp.error,
"individual_id": individual_id,
"model_id": model_id
}
# Return minimal success response
return {
"success": True,
"removed": True,
"individual_id": individual_id
}
@mcp.tool()
async def remove_fact(
model_id: str,
subject_id: str,
object_id: str,
predicate_id: str
) -> Dict[str, Any]:
"""
Remove a fact from a GO-CAM model.
You must specify the exact triple (subject, predicate, object) to remove.
Args:
model_id: The GO-CAM model identifier
subject_id: Subject of the fact
object_id: Object of the fact
predicate_id: Predicate of the fact
Returns:
Barista API response
Examples:
# Remove an enabled_by relationship
remove_fact(
"gomodel:12345",
"gomodel:12345/mf_123",
"gomodel:12345/gp_456",
"RO:0002333"
)
# Remove a causal relationship
remove_fact(
"gomodel:12345",
"gomodel:12345/activity1",
"gomodel:12345/activity2",
"RO:0002413" # provides input for
)
# Remove occurs_in relationship
remove_fact(
"gomodel:12345",
"gomodel:12345/mf_123",
"gomodel:12345/cc_789",
"BFO:0000066" # occurs_in
)
# Remove using variable references (within same batch)
remove_fact("gomodel:12345", "mf1", "gp1", "RO:0002333")
"""
client = get_client()
resp = client.remove_fact(model_id, subject_id, object_id, predicate_id)
if resp.validation_failed:
return {
"success": False,
"error": "Validation failed",
"reason": resp.validation_reason,
"rolled_back": True,
"fact": {
"subject": subject_id,
"predicate": predicate_id,
"object": object_id
},
"model_id": model_id
}
if resp.error:
return {
"success": False,
"error": resp.error,
"fact": {
"subject": subject_id,
"predicate": predicate_id,
"object": object_id
},
"model_id": model_id
}
# Return minimal success response
return {
"success": True,
"removed": True
}
@mcp.tool()
async def get_model(model_id: str) -> Dict[str, Any]:
"""
Retrieve the full JSON representation of a GO-CAM model.
Args:
model_id: The GO-CAM model identifier
Returns:
Full model data including individuals and facts
Examples:
# Get a production model
model = get_model("gomodel:5fce9b7300001215")
# Returns complete model with:
# - data.id: model ID
# - data.individuals: list of all individuals
# - data.facts: list of all relationships
# - data.annotations: model-level annotations
# Extract specific information
model = get_model("gomodel:12345")
individuals = model["data"]["individuals"]
facts = model["data"]["facts"]
# Find all molecular functions
mfs = [i for i in individuals
if any("GO:0003674" in str(e) for e in i.get("expressions", []))]
# Find all enabled_by relationships
enabled_by = [f for f in facts if f["property"] == "RO:0002333"]
# Check model state
annotations = model["data"].get("annotations", [])
state = next((a["value"] for a in annotations if a["key"] == "state"), None)
"""
client = get_client()
resp = client.get_model(model_id)
if resp.error:
return {
"success": False,
"error": resp.error,
"model_id": model_id
}
# Return structured response with model data
return {
"success": True,
"model_id": model_id,
"data": {
"individuals": resp.individuals,
"facts": resp.facts,
"annotations": resp.annotations if hasattr(resp, 'annotations') else [],
"state": resp.model_state if hasattr(resp, 'model_state') else None
},
"raw": resp.raw # Include raw for backward compatibility
}
@mcp.tool()
async def add_basic_pathway(
model_id: str,
pathway_curie: str,
pathway_label: str,
mf_curie: str,
mf_label: str,
gene_product_curie: str,
gene_product_label: str,
cc_curie: str,
cc_label: str
) -> Dict[str, Any]:
"""
Add a basic GO-CAM pathway unit: MF enabled_by GP, occurs_in CC, part_of BP.
Creates a complete activity unit with all standard relationships, with
label validation to prevent ID errors.
Args:
model_id: The GO-CAM model identifier
pathway_curie: Biological process term ID
pathway_label: Biological process term label
mf_curie: Molecular function term ID
mf_label: Molecular function term label
gene_product_curie: Gene product/protein identifier
gene_product_label: Gene product/protein name
cc_curie: Cellular component term ID
cc_label: Cellular component term label
Returns:
Barista API response. If validation fails, includes error details.
Examples:
# Add a kinase activity in Wnt signaling
add_basic_pathway(
"gomodel:12345",
"GO:0016055", "Wnt signaling pathway",
"GO:0004672", "protein kinase activity",
"UniProtKB:P68400", "CSNK1A1",
"GO:0005737", "cytoplasm"
)
# Add a transcription factor activity
add_basic_pathway(
"gomodel:12345",
"GO:0006355", "regulation of transcription, DNA-templated",
"GO:0003700", "DNA-binding transcription factor activity",
"UniProtKB:Q01094", "E2F1",
"GO:0005634", "nucleus"
)
# Add a receptor activity at membrane
add_basic_pathway(
"gomodel:12345",
"GO:0007165", "signal transduction",
"GO:0004888", "transmembrane signaling receptor activity",
"UniProtKB:P04626", "ERBB2",
"GO:0005886", "plasma membrane"
)
Notes:
- All labels are required to prevent ID hallucination
- Operations are executed with validation and automatic rollback
- If any validation fails, all changes are rolled back
"""
client = get_client()
# Build the batch of requests for the pathway unit
reqs: List[Dict[str, Any]] = []
# Create MF individual
reqs.append(client.req_add_individual(model_id, mf_curie, "mf1"))
# Create GP individual
reqs.append(client.req_add_individual(model_id, gene_product_curie, "gp1"))
# Create BP individual
reqs.append(client.req_add_individual(model_id, pathway_curie, "bp1"))
# Create CC individual
reqs.append(client.req_add_individual(model_id, cc_curie, "cc1"))
# Add relationships
reqs.append(client.req_add_fact(model_id, "mf1", "gp1", "RO:0002333")) # enabled_by
reqs.append(client.req_add_fact(model_id, "mf1", "cc1", "BFO:0000066")) # occurs_in
reqs.append(client.req_add_fact(model_id, "mf1", "bp1", "BFO:0000050")) # part_of
# Execute with validation
expected_individuals = [
{"id": mf_curie, "label": mf_label},
{"id": gene_product_curie, "label": gene_product_label},
{"id": pathway_curie, "label": pathway_label},
{"id": cc_curie, "label": cc_label}
]
resp = client.execute_with_validation(reqs, expected_individuals=expected_individuals)
if resp.validation_failed:
return {
"success": False,
"error": "Validation failed",
"reason": resp.validation_reason,
"rolled_back": True,
"model_id": model_id,
"entities": {
"pathway": {"curie": pathway_curie, "label": pathway_label},
"molecular_function": {"curie": mf_curie, "label": mf_label},
"gene_product": {"curie": gene_product_curie, "label": gene_product_label},
"cellular_component": {"curie": cc_curie, "label": cc_label}
}
}
if resp.error:
return {
"success": False,
"error": resp.error,
"model_id": model_id
}
# Return minimal success response
return {
"success": True,
"pathway_created": True,
"individuals_added": 4,
"facts_added": 3
}
@mcp.tool()
async def add_causal_chain(
model_id: str,
mf1_curie: str,
mf1_label: str,
mf2_curie: str,
mf2_label: str,
gp1_curie: str,
gp1_label: str,
gp2_curie: str,
gp2_label: str,
causal_relation: str = "RO:0002411"
) -> Dict[str, Any]:
"""
Add two molecular functions connected by a causal relationship.
Creates two complete activities and links them causally, with label
validation to prevent ID errors.
Args:
model_id: The GO-CAM model identifier
mf1_curie: First molecular function ID
mf1_label: First molecular function label
mf2_curie: Second molecular function ID
mf2_label: Second molecular function label
gp1_curie: Gene product for first MF
gp1_label: Gene product name for first MF
gp2_curie: Gene product for second MF
gp2_label: Gene product name for second MF
causal_relation: Causal relation (default: RO:0002411 - causally upstream of)
Returns:
Barista API response. If validation fails, includes error details.
Examples:
# Kinase activating another kinase
add_causal_chain(
"gomodel:12345",
"GO:0004674", "protein serine/threonine kinase activity",
"GO:0004674", "protein serine/threonine kinase activity",
"UniProtKB:P31749", "AKT1",
"UniProtKB:P31751", "AKT2",
"RO:0002629" # directly positively regulates
)
# Receptor activating kinase cascade
add_causal_chain(
"gomodel:12345",
"GO:0004888", "transmembrane signaling receptor activity",
"GO:0004674", "protein serine/threonine kinase activity",
"UniProtKB:P04626", "ERBB2",
"UniProtKB:P31749", "AKT1",
"RO:0002411" # causally upstream of
)
# Transcription factor inhibiting another
add_causal_chain(
"gomodel:12345",
"GO:0003700", "DNA-binding transcription factor activity",
"GO:0003700", "DNA-binding transcription factor activity",
"UniProtKB:P01106", "MYC",
"UniProtKB:Q01094", "E2F1",
"RO:0002630" # directly negatively regulates
)
# Common causal relations:
# RO:0002411 - causally upstream of (general)
# RO:0002629 - directly positively regulates
# RO:0002630 - directly negatively regulates
# RO:0002413 - provides input for
# RO:0002407 - indirectly positively regulates
# RO:0002409 - indirectly negatively regulates
# RO:0002304 - causally upstream of, positive effect
# RO:0002305 - causally upstream of, negative effect
Notes:
- All labels are required to prevent ID hallucination
- Operations are executed with validation and automatic rollback
- If any validation fails, all changes are rolled back
"""
client = get_client()
reqs: List[Dict[str, Any]] = []
# First activity
reqs.append(client.req_add_individual(model_id, mf1_curie, "mf1"))
reqs.append(client.req_add_individual(model_id, gp1_curie, "gp1"))
reqs.append(client.req_add_fact(model_id, "mf1", "gp1", "RO:0002333"))
# Second activity
reqs.append(client.req_add_individual(model_id, mf2_curie, "mf2"))
reqs.append(client.req_add_individual(model_id, gp2_curie, "gp2"))
reqs.append(client.req_add_fact(model_id, "mf2", "gp2", "RO:0002333"))
# Causal connection
reqs.append(client.req_add_fact(model_id, "mf1", "mf2", causal_relation))
# Execute with validation
expected_individuals = [
{"id": mf1_curie, "label": mf1_label},
{"id": gp1_curie, "label": gp1_label},
{"id": mf2_curie, "label": mf2_label},
{"id": gp2_curie, "label": gp2_label}
]
resp = client.execute_with_validation(reqs, expected_individuals=expected_individuals)
if resp.validation_failed:
return {
"success": False,
"error": "Validation failed",
"reason": resp.validation_reason,
"rolled_back": True,
"model_id": model_id,
"entities": {
"activity1": {
"molecular_function": {"curie": mf1_curie, "label": mf1_label},
"gene_product": {"curie": gp1_curie, "label": gp1_label}
},
"activity2": {
"molecular_function": {"curie": mf2_curie, "label": mf2_label},
"gene_product": {"curie": gp2_curie, "label": gp2_label}
}
}
}
if resp.error:
return {
"success": False,
"error": resp.error,
"model_id": model_id
}
# Return minimal success response
return {
"success": True,
"causal_chain_created": True,
"activities_added": 2,
"causal_relationship": causal_relation
}
@mcp.tool()
async def model_summary(model_id: str) -> Dict[str, Any]:
"""
Get a summary of a GO-CAM model including counts and key information.
Args:
model_id: The GO-CAM model identifier
Returns:
Summary with individual count, fact count, and predicate distribution
Examples:
# Get summary of a model
result = model_summary("gomodel:5fce9b7300001215")
# Returns:
# {
# "model_id": "gomodel:5fce9b7300001215",
# "state": "production",
# "individual_count": 42,
# "fact_count": 67,
# "predicate_distribution": {
# "RO:0002333": 15, # enabled_by (note: not in vetted list)
# "RO:0002411": 8, # causally upstream of
# "BFO:0000066": 12, # occurs_in
# "BFO:0000050": 5 # part_of
# }
# }
# Check if a model is empty
result = model_summary("gomodel:new_empty_model")
if result["individual_count"] == 0:
print("Model is empty")
# Analyze model complexity
result = model_summary("gomodel:12345")
causal_edges = result["predicate_distribution"].get("RO:0002411", 0)
causal_edges += result["predicate_distribution"].get("RO:0002413", 0) # provides input for
causal_edges += result["predicate_distribution"].get("RO:0002629", 0) # directly positively regulates
causal_edges += result["predicate_distribution"].get("RO:0002630", 0) # directly negatively regulates
print(f"Model has {causal_edges} causal relationships")
"""
client = get_client()
resp = client.get_model(model_id)
if resp.validation_failed:
return {
"success": False,
"error": "Validation failed",
"reason": resp.validation_reason,
"model_id": model_id
}
if resp.error:
return {
"success": False,
"error": "Failed to retrieve model",
"reason": resp.error,
"model_id": model_id
}
# Extract summary information
individuals = resp.individuals
facts = resp.facts
# Count predicates
predicate_counts: Dict[str, int] = {}
for fact in facts:
pred = fact.get("property", "unknown")
predicate_counts[pred] = predicate_counts.get(pred, 0) + 1
# Get model state if available
model_state = resp.model_state
return {
"success": True,
"model_id": model_id,
"state": model_state,
"individual_count": len(individuals),
"fact_count": len(facts),
"predicate_distribution": predicate_counts,
}
@mcp.tool()
async def search_models(
title: Optional[str] = None,
state: Optional[str] = None,
contributor: Optional[str] = None,
group: Optional[str] = None,
pmid: Optional[str] = None,
gene_product: Optional[str] = None,
limit: int = 50,
offset: int = 0
) -> Dict[str, Any]:
"""
Search for GO-CAM models based on various criteria.
Allows searching models by title, state, contributor, group, publication, or gene product.
Returns a list of matching models with their metadata.
Args:
title: Search for models containing this text in their title
state: Filter by model state (production, development, internal_test)
contributor: Filter by contributor ORCID (e.g., 'https://orcid.org/0000-0002-6601-2165')
group: Filter by group/provider (e.g., 'http://www.wormbase.org')
pmid: Filter by PubMed ID (e.g., 'PMID:12345678')
gene_product: Filter by gene product (e.g., 'UniProtKB:Q9BRQ8', 'MGI:MGI:97490')
limit: Maximum number of results to return (default: 50)
offset: Offset for pagination (default: 0)
Returns:
Dictionary containing search results with model metadata
Examples:
# Search for all production models
results = search_models(state="production")
# Find models containing "Wnt signaling" in title
results = search_models(title="Wnt signaling")
# Find models for a specific gene product
results = search_models(gene_product="UniProtKB:P38398")
# Find models from a specific paper
results = search_models(pmid="PMID:30194302")
# Find models by a specific contributor
results = search_models(
contributor="https://orcid.org/0000-0002-6601-2165"
)
# Combine filters
results = search_models(
state="production",
title="kinase",
limit=10
)
# Pagination example
page1 = search_models(limit=50, offset=0)
page2 = search_models(limit=50, offset=50)
# Find models from specific research group
results = search_models(group="http://www.wormbase.org")
# Search for development models with specific gene
results = search_models(
state="development",
gene_product="MGI:MGI:97490"
)
Notes:
- Results include model ID, title, state, contributors, and dates
- Use pagination (offset/limit) for large result sets
- Filters can be combined for more specific searches
- Gene products can be from various databases (UniProt, MGI, RGD, etc.)
"""
client = get_client()
try:
results = client.list_models(
title=title,
state=state,
contributor=contributor,
group=group,
pmid=pmid,
gp=gene_product,
limit=limit,
offset=offset
)
return results
except Exception as e:
return {
"error": "Failed to search models",
"message": str(e)
}
@mcp.tool()
async def search_bioentities(
text: Optional[str] = None,
taxon: Optional[str] = None,
bioentity_type: Optional[str] = None,
source: Optional[str] = None,
limit: int = 10,
offset: int = 0
) -> Dict[str, Any]:
"""
Search for bioentities (genes/proteins) using Gene Ontology data.
Searches across gene and protein names/labels with optional taxonomic filtering.
Provides access to comprehensive bioentity information from GOlr.
Args:
text: Text search across names and labels (e.g., "insulin", "kinase")
taxon: Organism filter - accepts NCBI Taxon ID with or without prefix
(e.g., "9606", "NCBITaxon:9606" for human)
bioentity_type: Type filter (e.g., "protein", "gene")
source: Source database filter (e.g., "UniProtKB", "MGI", "RGD")
limit: Maximum number of results to return (default: 10)
offset: Starting offset for pagination (default: 0)
Returns:
Dictionary containing search results with bioentity information
Examples:
# Search for human insulin proteins
results = search_bioentities(
text="insulin",
taxon="9606",
bioentity_type="protein"
)
# Find mouse kinases from MGI
results = search_bioentities(
text="kinase",
taxon="NCBITaxon:10090",
source="MGI",
limit=20
)
# Search for any human genes/proteins
results = search_bioentities(
taxon="9606",
limit=50
)
# Find specific protein types
results = search_bioentities(
text="receptor",
bioentity_type="protein",
limit=25
)
# Search across all organisms
results = search_bioentities(text="p53")
# Pagination example
page1 = search_bioentities(text="kinase", limit=10, offset=0)
page2 = search_bioentities(text="kinase", limit=10, offset=10)
# Common organisms:
# Human: "9606" or "NCBITaxon:9606"
# Mouse: "10090" or "NCBITaxon:10090"
# Rat: "10116" or "NCBITaxon:10116"
# Fly: "7227" or "NCBITaxon:7227"
# Worm: "6239" or "NCBITaxon:6239"
# Yeast: "559292" or "NCBITaxon:559292"
Notes:
- Results include ID, name, type, organism, and source information
- Text search covers both short names/symbols and full descriptions
- Taxon IDs automatically handle NCBITaxon: prefix normalization
- Use pagination for large result sets
- Sources include UniProtKB, MGI, RGD, ZFIN, SGD, and others
"""
# Normalize taxon ID - add NCBITaxon prefix if just a number
if taxon and not taxon.startswith("NCBITaxon:"):
if taxon.isdigit():
taxon = f"NCBITaxon:{taxon}"
try:
with AmigoClient() as client:
results = client.search_bioentities(
text=text,
taxon=taxon,
bioentity_type=bioentity_type,
source=source,
limit=limit,
offset=offset
)
return {
"results": [
{
"id": result.id,
"label": result.label,
"name": result.name,
"type": result.type,
"taxon": result.taxon,
"taxon_label": result.taxon_label,
"source": result.source
}
for result in results
],
"count": len(results),
"limit": limit,
"offset": offset
}
except Exception as e:
return {
"error": "Failed to search bioentities",
"message": str(e)
}
@mcp.tool()
async def search_annotations(
bioentity: Optional[str] = None,
go_term: Optional[str] = None,
evidence_types: Optional[str] = None,
taxon: Optional[str] = None,
aspect: Optional[str] = None,
assigned_by: Optional[str] = None,
limit: int = 10
) -> Dict[str, Any]:
"""
Search for GO annotations (evidence) with filtering.
Args:
bioentity: Specific bioentity ID to filter by (e.g., "UniProtKB:P12345")
go_term: Specific GO term ID to filter by (e.g., "GO:0008150")
evidence_types: Comma-separated evidence codes (e.g., "IDA,IPI,IMP")
taxon: Organism filter - accepts numeric (9606) or full ID (NCBITaxon:9606)
aspect: GO aspect filter - "C" (cellular component), "F" (molecular function), or "P" (biological process)
assigned_by: Annotation source filter (e.g., "GOC", "UniProtKB", "MGI")
limit: Maximum number of results (default: 10, max: 1000)
Returns:
Dictionary containing:
- annotations: List of annotation results with evidence details
- total: Number of results returned
Examples:
# Find all evidence for a specific protein
search_annotations(bioentity="UniProtKB:P53762")
# Find proteins with experimental evidence for a GO term
search_annotations(go_term="GO:0005634", evidence_types="IDA,IPI")
# Find human proteins in nucleus with experimental evidence
search_annotations(
go_term="GO:0005634",
taxon="9606",
evidence_types="IDA,IPI,IMP",
aspect="C"
)
# Find all UniProt annotations for apoptosis
search_annotations(
go_term="GO:0006915",
assigned_by="UniProtKB"
)
"""
# Normalize taxon ID
if taxon and not taxon.startswith("NCBITaxon:"):
if taxon.isdigit():
taxon = f"NCBITaxon:{taxon}"
# Parse evidence types
evidence_list = None
if evidence_types:
evidence_list = [e.strip() for e in evidence_types.split(",")]
# Limit bounds
limit = min(max(1, limit), 1000)
try:
with AmigoClient() as client:
results = client.search_annotations(
bioentity=bioentity,
go_term=go_term,
evidence_types=evidence_list,
taxon=taxon,
aspect=aspect,
assigned_by=assigned_by,
limit=limit
)
return {
"annotations": [
{
"bioentity": r.bioentity,
"bioentity_label": r.bioentity_label,
"bioentity_name": r.bioentity_name,
"go_term": r.annotation_class,
"go_term_label": r.annotation_class_label,
"aspect": r.aspect,
"evidence_type": r.evidence_type,
"evidence": r.evidence,
"evidence_label": r.evidence_label,
"reference": r.reference,
"assigned_by": r.assigned_by,
"date": r.date,
"taxon": r.taxon,
"taxon_label": r.taxon_label,
"qualifier": r.qualifier,
"annotation_extension": r.annotation_extension
}
for r in results
],
"total": len(results)
}
except Exception as e:
return {
"error": "Failed to search annotations",
"message": str(e)
}
@mcp.tool()
async def get_annotations_for_bioentity(
bioentity_id: str,
go_terms: Optional[str] = None,
evidence_types: Optional[str] = None,
aspect: Optional[str] = None,
limit: int = 100
) -> Dict[str, Any]:
"""
Get all GO annotations (evidence) for a specific bioentity.
Args:
bioentity_id: The bioentity ID (e.g., "UniProtKB:P12345")
go_terms: Comma-separated GO terms to filter (includes child terms)
evidence_types: Comma-separated evidence codes to filter (e.g., "IDA,IPI")
aspect: GO aspect filter - "C", "F", or "P"
limit: Maximum number of results (default: 100)
Returns:
Dictionary containing:
- bioentity_id: The queried bioentity
- annotations: List of annotation results
- summary: Count by aspect and evidence type
Examples:
# Get all annotations for a protein
get_annotations_for_bioentity("UniProtKB:P53762")
# Get only experimental evidence
get_annotations_for_bioentity(
"UniProtKB:P53762",
evidence_types="IDA,IPI,IMP"
)
# Get annotations for specific GO terms
get_annotations_for_bioentity(
"UniProtKB:P53762",
go_terms="GO:0005634,GO:0005737"
)
# Get only molecular function annotations
get_annotations_for_bioentity(
"UniProtKB:P53762",
aspect="F"
)
"""
# Parse comma-separated lists
go_terms_list = None
if go_terms:
go_terms_list = [t.strip() for t in go_terms.split(",")]
evidence_list = None
if evidence_types:
evidence_list = [e.strip() for e in evidence_types.split(",")]
try:
with AmigoClient() as client:
results = client.get_annotations_for_bioentity(
bioentity_id=bioentity_id,
go_terms_closure=go_terms_list,
evidence_types=evidence_list,
aspect=aspect,
limit=limit
)
# Calculate summary statistics
aspect_counts: Dict[str, int] = {}
evidence_counts: Dict[str, int] = {}
for r in results:
aspect_counts[r.aspect] = aspect_counts.get(r.aspect, 0) + 1
evidence_counts[r.evidence_type] = evidence_counts.get(r.evidence_type, 0) + 1
return {
"bioentity_id": bioentity_id,
"annotations": [
{
"go_term": r.annotation_class,
"go_term_label": r.annotation_class_label,
"aspect": r.aspect,
"evidence_type": r.evidence_type,
"evidence": r.evidence,
"evidence_label": r.evidence_label,
"reference": r.reference,
"assigned_by": r.assigned_by,
"date": r.date,
"qualifier": r.qualifier,
"annotation_extension": r.annotation_extension
}
for r in results
],
"summary": {
"total": len(results),
"by_aspect": aspect_counts,
"by_evidence_type": evidence_counts
}
}
except Exception as e:
return {
"error": "Failed to get annotations",
"message": str(e)
}
# Prompts to help users construct common patterns
@mcp.prompt()
def create_basic_activity() -> str:
"""Generate a prompt for creating a basic GO-CAM activity."""
return """To create a basic GO-CAM activity, use:
1. add_individual to create a molecular function instance
2. add_individual to create a gene product instance
3. add_fact with RO:0002333 (enabled_by) to connect them
"""
@mcp.prompt()
def add_evidence_prompt() -> str:
"""Generate a prompt for adding evidence to facts."""
return """To add evidence to a fact:
1. Use add_evidence_to_fact with the fact coordinates
2. Provide an ECO code (e.g., ECO:0000353 for IPI)
3. Include source references (e.g., PMID:12345)
"""
if __name__ == "__main__":
mcp.run()