Noctua MCP Server

Official

Overview Schema Related Servers Score Discussions

noctua-mcp
src
noctua_mcp

mcp_server.py•63.7 KiB

""" Noctua MCP Server ================= Thin MCP wrapper around gocam-ai library for GO-CAM model manipulation. This server exposes GO-CAM editing capabilities through the Model Context Protocol. Security & credentials ---------------------- - Set BARISTA_TOKEN in environment before launch - The token is used by the underlying gocam-ai BaristaClient Transport --------- - Runs via stdio transport by default - Launch with: uvx noctua-mcp """ from typing import Any, Dict, List, Optional from pathlib import Path from fastmcp import FastMCP from noctua import BaristaClient from noctua.amigo import AmigoClient from noctua.models import ProteinComplexComponent, EntitySetMember # Path to guidelines directory GUIDELINES_DIR = Path(__file__).parent / "guidelines" mcp = FastMCP( "noctua-mcp", instructions=""" Noctua MCP Server provides tools for editing GO-CAM models via the Barista API. Use these tools to create and edit GO-CAM models with individuals, facts, and evidence. This also provides tools for finding relevant genes and gene products (entities) for use in the model. Available operations: - Create new empty GO-CAM models - Add individuals (instances) of GO/ECO terms - Add facts (edges) between individuals - Add evidence to facts - Add protein complexes with validated components - Add entity sets (functionally interchangeable entities) - Remove individuals and facts - Query model structure - Search for bioentities and annotations """ ) # Create a module-level client instance (will be lazily initialized) _client: Optional[BaristaClient] = None def get_client() -> BaristaClient: """Get or create the Barista client instance.""" global _client if _client is None: _client = BaristaClient() return _client @mcp.tool() async def configure_token(token: str) -> Dict[str, Any]: """ Configure the Barista authentication token. Args: token: The Barista authentication token Returns: Success status """ import os global _client # Set environment variable os.environ["BARISTA_TOKEN"] = token # Reset client to pick up new token _client = None return { "success": True, "configured": True } @mcp.tool() async def create_model( title: Optional[str] = None ) -> Dict[str, Any]: """ Create a new empty GO-CAM model. Args: title: Optional title for the model Returns: Barista API response containing the new model ID and editor URLs Examples: - `create_model("RAS-RAF signaling pathway")` Notes: - The returned model_id can be used with other tools like add_individual - Models are created in "development" state by default - To add taxon information, use add_individual after creating the model """ client = get_client() resp = client.create_model(title=title) if resp.validation_failed: return { "success": False, "error": "Validation failed", "reason": resp.validation_reason, "rolled_back": True } if resp.error: return { "success": False, "error": "Operation failed", "reason": resp.error } # Build minimal response result = { "success": True, "model_id": resp.model_id, "created": True } # Add editor URLs if we have a model ID if resp.model_id: import os token = os.environ.get("BARISTA_TOKEN", "") # Graph editor with token result["graph_editor_url"] = f"http://noctua-dev.berkeleybop.org/editor/graph/{resp.model_id}?barista_token={token}" # Pathway editor without token (URL encoded model ID) from urllib.parse import quote encoded_id = quote(resp.model_id, safe="") result["pathway_editor_url"] = f"http://noctua-dev.berkeleybop.org/workbench/noctua-visual-pathway-editor/?model_id={encoded_id}" return result @mcp.tool() async def add_individual( model_id: str, class_curie: str, class_label: str, assign_var: str = "x1" ) -> Dict[str, Any]: """ Add an individual (instance) of a class to a GO-CAM model with label validation. This tool requires providing the expected label for the class to prevent accidental use of wrong IDs (e.g., GO:0003924 vs GO:0003925). The operation will automatically rollback if the created individual doesn't match the expected label. Args: model_id: The GO-CAM model identifier (e.g., "gomodel:12345") class_curie: The class to instantiate (e.g., "GO:0003674") class_label: The expected rdfs:label of the class (e.g., "molecular_function") assign_var: Variable name for referencing in the same batch Returns: Barista API response with message-type and signal fields. If validation fails, includes rolled_back=true and validation error. Examples: # Add a molecular function activity with validation add_individual("gomodel:12345", "GO:0004672", "protein kinase activity", "mf1") # Add a protein/gene product with validation add_individual("gomodel:12345", "UniProtKB:P38398", "BRCA1", "gp1") # Add a cellular component with validation add_individual("gomodel:12345", "GO:0005737", "cytoplasm", "cc1") # Add a biological process with validation add_individual("gomodel:12345", "GO:0016055", "Wnt signaling pathway", "bp1") # Add an evidence instance with validation add_individual("gomodel:12345", "ECO:0000353", "physical interaction evidence", "ev1") # Variables like "mf1", "gp1" can be referenced in subsequent # add_fact calls within the same batch operation Notes: - The label acts as a checksum to prevent ID hallucination - If the label doesn't match, the operation is automatically rolled back - This prevents corrupt models from incorrect IDs """ client = get_client() resp = client.add_individual(model_id, class_curie, assign_var, expected_label=class_label) if resp.validation_failed: return { "success": False, "error": "Validation failed", "reason": resp.validation_reason, "rolled_back": True, "expected_label": class_label, "class_curie": class_curie } if resp.error: return { "success": False, "error": resp.error, "model_id": model_id, "class_curie": class_curie } # Get the actual individual ID from model_vars or from the individuals list individual_id = assign_var if resp.model_vars and assign_var in resp.model_vars: individual_id = resp.model_vars[assign_var] elif resp.individuals and len(resp.individuals) > 0: # If model_vars is empty, get the ID from the last individual (the one just created) individual_id = resp.individuals[-1].id # Return minimal success response return { "success": True, "individual_id": individual_id, "class_curie": class_curie, "assign_var": assign_var } @mcp.tool() async def add_fact( model_id: str, subject_id: str, object_id: str, predicate_id: str ) -> Dict[str, Any]: """ Add a fact (edge/relation) between two individuals in a model. Args: model_id: The GO-CAM model identifier subject_id: Subject individual ID or variable object_id: Object individual ID or variable predicate_id: Relation predicate (e.g., "RO:0002333" for enabled_by) Returns: Barista API response Examples: # Connect molecular function to gene product (enabled_by) add_fact("gomodel:12345", "mf1", "gp1", "RO:0002333") # Connect molecular function to cellular component (occurs_in) add_fact("gomodel:12345", "mf1", "cc1", "BFO:0000066") # Connect molecular function to biological process (part_of) add_fact("gomodel:12345", "mf1", "bp1", "BFO:0000050") # Add causal relationship between activities add_fact("gomodel:12345", "mf1", "mf2", "RO:0002411") # causally upstream of add_fact("gomodel:12345", "mf1", "mf2", "RO:0002629") # directly positively regulates add_fact("gomodel:12345", "mf1", "mf2", "RO:0002630") # directly negatively regulates add_fact("gomodel:12345", "mf1", "mf2", "RO:0002413") # provides input for # Add regulates relationships add_fact("gomodel:12345", "mf1", "bp1", "RO:0002211") # regulates add_fact("gomodel:12345", "mf1", "bp1", "RO:0002213") # positively regulates add_fact("gomodel:12345", "mf1", "bp1", "RO:0002212") # negatively regulates # Add indirect regulation relationships add_fact("gomodel:12345", "mf1", "mf2", "RO:0002407") # indirectly positively regulates add_fact("gomodel:12345", "mf1", "mf2", "RO:0002409") # indirectly negatively regulates # Add causal relationships with effects add_fact("gomodel:12345", "mf1", "mf2", "RO:0002304") # causally upstream of, positive effect add_fact("gomodel:12345", "mf1", "mf2", "RO:0002305") # causally upstream of, negative effect # Add small molecule regulation relationships add_fact("gomodel:12345", "sm1", "mf1", "RO:0012005") # is small molecule activator of add_fact("gomodel:12345", "sm1", "mf1", "RO:0012006") # is small molecule inhibitor of # Use with existing individual IDs from model add_fact("gomodel:12345", "gomodel:12345/abc123", "gomodel:12345/def456", "RO:0002333") """ client = get_client() # Resolve any variables to actual IDs resolved_subject = client._resolve_identifier(model_id, subject_id) resolved_object = client._resolve_identifier(model_id, object_id) req = client.req_add_fact(model_id, resolved_subject, resolved_object, predicate_id) resp = client.m3_batch([req]) if resp.validation_failed: return { "success": False, "error": "Validation failed", "reason": resp.validation_reason, "rolled_back": True, "fact": { "subject": subject_id, "predicate": predicate_id, "object": object_id } } if resp.error: return { "success": False, "error": resp.error, "model_id": model_id, "fact": { "subject": subject_id, "predicate": predicate_id, "object": object_id } } # Return minimal success response return { "success": True, "fact_added": True } @mcp.tool() async def add_evidence_to_fact( model_id: str, subject_id: str, object_id: str, predicate_id: str, eco_id: str, sources: List[str], with_from: Optional[List[str]] = None ) -> Dict[str, Any]: """ Add evidence to an existing fact in a GO-CAM model. Args: model_id: The GO-CAM model identifier subject_id: Subject of the fact object_id: Object of the fact predicate_id: Predicate of the fact eco_id: Evidence code (e.g., "ECO:0000353") sources: List of source references (e.g., ["PMID:12345"]) with_from: Optional list of with/from references Returns: Barista API response Examples: # Add experimental evidence from a paper add_evidence_to_fact( "gomodel:12345", "mf1", "gp1", "RO:0002333", "ECO:0000353", # physical interaction evidence ["PMID:12345678"] ) # Add multiple sources add_evidence_to_fact( "gomodel:12345", "mf1", "gp1", "RO:0002333", "ECO:0000314", # direct assay evidence ["PMID:12345678", "PMID:87654321", "doi:10.1234/example"] ) # Add evidence with with/from (e.g., for IPI) add_evidence_to_fact( "gomodel:12345", "mf1", "gp1", "RO:0002333", "ECO:0000353", # IPI ["PMID:12345678"], ["UniProtKB:Q9Y6K9", "UniProtKB:P38398"] # interacting partners ) # Common evidence codes: # ECO:0000314 - direct assay evidence # ECO:0000353 - physical interaction evidence (IPI) # ECO:0000315 - mutant phenotype evidence (IMP) # ECO:0000316 - genetic interaction evidence (IGI) # ECO:0000318 - biological aspect of ancestor evidence (IBA) # ECO:0000269 - experimental evidence """ client = get_client() reqs = client.req_add_evidence_to_fact( model_id, subject_id, object_id, predicate_id, eco_id, sources, with_from ) resp = client.m3_batch(reqs) if resp.validation_failed: return { "success": False, "error": "Validation failed", "reason": resp.validation_reason, "rolled_back": True, "fact": { "subject": subject_id, "predicate": predicate_id, "object": object_id }, "evidence": { "eco_id": eco_id, "sources": sources, "with_from": with_from } } if resp.error: return { "success": False, "error": resp.error, "model_id": model_id, "fact": { "subject": subject_id, "predicate": predicate_id, "object": object_id } } # Return minimal success response return { "success": True, "evidence_added": True, "eco_id": eco_id } @mcp.tool() async def add_protein_complex( model_id: str, components: List[Dict[str, Any]], assign_var: str = "complex1" ) -> Dict[str, Any]: """ Add a protein complex to a GO-CAM model with validated components. Creates a protein-containing complex (GO:0032991 by default) and links all components using BFO:0000051 (has part) relation. The operation is atomic - either all components are added successfully or the entire operation is rolled back. Args: model_id: The GO-CAM model identifier components: List of component dictionaries with keys: - entity_id (required): Protein/gene product ID (e.g., "UniProtKB:P12345") - label (optional): Component label for validation - evidence_type (optional): ECO code (e.g., "ECO:0000353") - reference (optional): Source reference (e.g., "PMID:12345678") assign_var: Variable name for the complex (default: "complex1") Returns: Barista API response with complex ID and component IDs Examples: # Create a simple dimer complex add_protein_complex( "gomodel:12345", [ {"entity_id": "UniProtKB:P04637", "label": "TP53"}, {"entity_id": "UniProtKB:P04637", "label": "TP53"} ], ) # Create complex with evidence add_protein_complex( "gomodel:12345", [ { "entity_id": "UniProtKB:P68400", "label": "CSNK1A1", "evidence_type": "ECO:0000353", "reference": "PMID:12345678" }, { "entity_id": "UniProtKB:P49841", "label": "GSK3B", "evidence_type": "ECO:0000353", "reference": "PMID:12345678" } ], assign_var="destruction_complex" ) # Create a complex with specific assignment variable add_protein_complex( "gomodel:12345", [ {"entity_id": "UniProtKB:P62191", "label": "PSMC1"}, {"entity_id": "UniProtKB:P62195", "label": "PSMC5"} ], assign_var="proteasome" ) Notes: - All components must have entity_id specified - Label validation prevents ID hallucination - Evidence and references are optional but recommended - Uses BFO:0000051 (has part) to link components - Atomic operation with automatic rollback on failure """ client = get_client() # Convert component dicts to Pydantic models try: pydantic_components = [ProteinComplexComponent(**comp) for comp in components] except Exception as e: return { "success": False, "error": "Invalid component structure", "reason": str(e), "hint": "Each component must have 'entity_id' field. Optional: 'label', 'evidence_type', 'reference'" } # Call the new add_protein_complex method resp = client.add_protein_complex( model_id, pydantic_components, assign_var=assign_var ) if resp.validation_failed: return { "success": False, "error": "Validation failed", "reason": resp.validation_reason, "rolled_back": True, "component_count": len(components) } if resp.error: return { "success": False, "error": resp.error, "model_id": model_id } # Get the complex ID from model_vars or from the individuals list complex_id = assign_var if resp.model_vars and assign_var in resp.model_vars: complex_id = resp.model_vars[assign_var] elif resp.individuals and len(resp.individuals) > 0: # Find the complex individual (it should be the first one created in this operation) # The complex is created first, then components are added for ind in resp.individuals: # Check if it's a protein-containing complex if hasattr(ind, 'type') and any('GO:0032991' in str(t.id) if hasattr(t, 'id') else False for t in ind.type): complex_id = ind.id break return { "success": True, "complex_id": complex_id, "component_count": len(components), "assign_var": assign_var } @mcp.tool() async def add_entity_set( model_id: str, members: List[Dict[str, Any]], assign_var: str = "set1" ) -> Dict[str, Any]: """ Add an entity set to a GO-CAM model with validated members. Creates an entity set (CHEBI:33695 "information biomacromolecule" by default) representing functionally interchangeable entities. Links members using RO:0019003 (has substitutable entity) relation. The operation is atomic - either all members are added successfully or the entire operation is rolled back. Args: model_id: The GO-CAM model identifier members: List of member dictionaries with keys: - entity_id (required): Entity ID (e.g., "UniProtKB:P12345") - label (optional): Member label for validation - evidence_type (optional): ECO code (e.g., "ECO:0000353") - reference (optional): Source reference (e.g., "PMID:12345678") assign_var: Variable name for the set (default: "set1") Returns: Barista API response with set ID and member IDs Examples: # Create a set of functionally equivalent kinases add_entity_set( "gomodel:12345", [ {"entity_id": "UniProtKB:P31749", "label": "AKT1"}, {"entity_id": "UniProtKB:P31751", "label": "AKT2"}, {"entity_id": "UniProtKB:Q9Y243", "label": "AKT3"} ], assign_var="akt_isoforms" ) # Create set with evidence add_entity_set( "gomodel:12345", [ { "entity_id": "UniProtKB:P04637", "label": "TP53", "evidence_type": "ECO:0000314", "reference": "PMID:87654321" }, { "entity_id": "UniProtKB:P04049", "label": "RAF1", "evidence_type": "ECO:0000314", "reference": "PMID:87654321" } ], ) Notes: - All members must have entity_id specified - Label validation prevents ID hallucination - Evidence and references are optional but recommended - Uses RO:0019003 (has substitutable entity) to link members - Atomic operation with automatic rollback on failure - Entity sets represent functionally interchangeable entities """ client = get_client() # Convert member dicts to Pydantic models try: pydantic_members = [EntitySetMember(**member) for member in members] except Exception as e: return { "success": False, "error": "Invalid member structure", "reason": str(e), "hint": "Each member must have 'entity_id' field. Optional: 'label', 'evidence_type', 'reference'" } # Call the new add_entity_set method resp = client.add_entity_set( model_id, pydantic_members, assign_var=assign_var, ) if resp.validation_failed: return { "success": False, "error": "Validation failed", "reason": resp.validation_reason, "rolled_back": True, "member_count": len(members) } if resp.error: return { "success": False, "error": resp.error, "model_id": model_id, } # Get the set ID from model_vars or from the individuals list set_id = assign_var if resp.model_vars and assign_var in resp.model_vars: set_id = resp.model_vars[assign_var] elif resp.individuals and len(resp.individuals) > 0: # Find the set individual - it should be the first one created # Look for an entity set (typically CHEBI:33695 or similar) for ind in resp.individuals: if hasattr(ind, 'type') and any('CHEBI' in str(t.id) if hasattr(t, 'id') else False for t in ind.type): set_id = ind.id break return { "success": True, "set_id": set_id, "member_count": len(members), "assign_var": assign_var } @mcp.tool() async def remove_individual( model_id: str, individual_id: str ) -> Dict[str, Any]: """ Remove an individual from a GO-CAM model. Note: This will also remove all facts (edges) connected to this individual. Args: model_id: The GO-CAM model identifier individual_id: The individual to remove Returns: Barista API response Examples: # Remove using a variable reference (within same batch) remove_individual("gomodel:12345", "mf1") # Remove using full individual ID remove_individual("gomodel:12345", "gomodel:12345/5fce9b7300001215") # Remove an evidence individual remove_individual("gomodel:12345", "gomodel:12345/evidence_123") # Clean up after testing for ind_id in ["test1", "test2", "test3"]: remove_individual("gomodel:12345", ind_id) """ client = get_client() resp = client.remove_individual(model_id, individual_id) if resp.validation_failed: return { "success": False, "error": "Validation failed", "reason": resp.validation_reason, "rolled_back": True, "individual_id": individual_id, "model_id": model_id } if resp.error: return { "success": False, "error": resp.error, "individual_id": individual_id, "model_id": model_id } # Return minimal success response return { "success": True, "removed": True, "individual_id": individual_id } @mcp.tool() async def remove_fact( model_id: str, subject_id: str, object_id: str, predicate_id: str ) -> Dict[str, Any]: """ Remove a fact from a GO-CAM model. You must specify the exact triple (subject, predicate, object) to remove. Args: model_id: The GO-CAM model identifier subject_id: Subject of the fact object_id: Object of the fact predicate_id: Predicate of the fact Returns: Barista API response Examples: # Remove an enabled_by relationship remove_fact( "gomodel:12345", "gomodel:12345/mf_123", "gomodel:12345/gp_456", "RO:0002333" ) # Remove a causal relationship remove_fact( "gomodel:12345", "gomodel:12345/activity1", "gomodel:12345/activity2", "RO:0002413" # provides input for ) # Remove occurs_in relationship remove_fact( "gomodel:12345", "gomodel:12345/mf_123", "gomodel:12345/cc_789", "BFO:0000066" # occurs_in ) # Remove using variable references (within same batch) remove_fact("gomodel:12345", "mf1", "gp1", "RO:0002333") """ client = get_client() resp = client.remove_fact(model_id, subject_id, object_id, predicate_id) if resp.validation_failed: return { "success": False, "error": "Validation failed", "reason": resp.validation_reason, "rolled_back": True, "fact": { "subject": subject_id, "predicate": predicate_id, "object": object_id }, "model_id": model_id } if resp.error: return { "success": False, "error": resp.error, "fact": { "subject": subject_id, "predicate": predicate_id, "object": object_id }, "model_id": model_id } # Return minimal success response return { "success": True, "removed": True } @mcp.tool() async def get_model(model_id: str) -> Dict[str, Any]: """ Retrieve the full JSON representation of a GO-CAM model. Args: model_id: The GO-CAM model identifier Returns: Full model data including individuals and facts Examples: # Get a production model model = get_model("gomodel:5fce9b7300001215") # Returns complete model with: # - data.id: model ID # - data.individuals: list of all individuals # - data.facts: list of all relationships # - data.annotations: model-level annotations # Extract specific information model = get_model("gomodel:12345") individuals = model["data"]["individuals"] facts = model["data"]["facts"] # Find all molecular functions mfs = [i for i in individuals if any("GO:0003674" in str(e.id) for e in i.type if hasattr(e, 'id'))] # Find all enabled_by relationships (facts are Pydantic objects) enabled_by = [f for f in facts if f.property == "RO:0002333"] # Check model state state = model["data"].get("state") """ client = get_client() resp = client.get_model(model_id) if resp.error: return { "success": False, "error": resp.error, "model_id": model_id } # Return structured response with model data return { "success": True, "model_id": model_id, "data": { "individuals": resp.individuals, "facts": resp.facts, "annotations": resp.annotations if hasattr(resp, 'annotations') else [], "state": resp.model_state if hasattr(resp, 'model_state') else None }, "raw": resp.raw # Include raw for backward compatibility } # NOTE: The following function is intentionally commented out and preserved for future reference. # BUG DESCRIPTION: # The add_activity_unit tool is currently buggy: it can create duplicate activity units in the GO-CAM model # due to incomplete rollback logic when an error occurs during multi-step model updates. # Specifically, if any part of the pathway unit creation fails, partial changes may remain, # resulting in duplicate molecular function (MF), gene product (GP), or cellular component (CC) individuals or facts. # # FUTURE PLANS: # - Refactor the function to ensure atomicity: either all steps succeed, or all changes are rolled back. # - Add checks to prevent duplicate individuals and relationships. # - Consider using transactions or temporary model states if supported by the Barista API. # # Until these issues are resolved, this function should not be enabled. #@mcp.tool() async def add_activity_unit( model_id: str, pathway_curie: str, pathway_label: str, mf_curie: str, mf_label: str, gene_product_curie: str, gene_product_label: str, cc_curie: str, cc_label: str ) -> Dict[str, Any]: """ Add a basic GO-CAM pathway unit: MF enabled_by GP, occurs_in CC, part_of BP. Creates a complete activity unit with all standard relationships, with label validation to prevent ID errors. Args: model_id: The GO-CAM model identifier pathway_curie: Biological process term ID pathway_label: Biological process term label mf_curie: Molecular function term ID mf_label: Molecular function term label gene_product_curie: Gene product/protein identifier gene_product_label: Gene product/protein name cc_curie: Cellular component term ID cc_label: Cellular component term label Returns: Barista API response. If validation fails, includes error details. Examples: # Add a kinase activity in Wnt signaling add_basic_pathway( "gomodel:12345", "GO:0016055", "Wnt signaling pathway", "GO:0004672", "protein kinase activity", "UniProtKB:P68400", "CSNK1A1", "GO:0005737", "cytoplasm" ) # Add a transcription factor activity add_basic_pathway( "gomodel:12345", "GO:0006355", "regulation of transcription, DNA-templated", "GO:0003700", "DNA-binding transcription factor activity", "UniProtKB:Q01094", "E2F1", "GO:0005634", "nucleus" ) # Add a receptor activity at membrane add_basic_pathway( "gomodel:12345", "GO:0007165", "signal transduction", "GO:0004888", "transmembrane signaling receptor activity", "UniProtKB:P04626", "ERBB2", "GO:0005886", "plasma membrane" ) Notes: - All labels are required to prevent ID hallucination - Operations are executed with validation and automatic rollback - If any validation fails, all changes are rolled back """ client = get_client() # Build the batch of requests for the pathway unit reqs: List[Dict[str, Any]] = [] # Create MF individual reqs.append(client.req_add_individual(model_id, mf_curie, "mf1")) # Create GP individual reqs.append(client.req_add_individual(model_id, gene_product_curie, "gp1")) # Create BP individual reqs.append(client.req_add_individual(model_id, pathway_curie, "bp1")) # Create CC individual reqs.append(client.req_add_individual(model_id, cc_curie, "cc1")) # Add relationships reqs.append(client.req_add_fact(model_id, "mf1", "gp1", "RO:0002333")) # enabled_by reqs.append(client.req_add_fact(model_id, "mf1", "cc1", "BFO:0000066")) # occurs_in reqs.append(client.req_add_fact(model_id, "mf1", "bp1", "BFO:0000050")) # part_of # Execute with validation expected_individuals = [ {"id": mf_curie, "label": mf_label}, {"id": gene_product_curie, "label": gene_product_label}, {"id": pathway_curie, "label": pathway_label}, {"id": cc_curie, "label": cc_label} ] resp = client.execute_with_validation(reqs, expected_individuals=expected_individuals) if resp.validation_failed: return { "success": False, "error": "Validation failed", "reason": resp.validation_reason, "rolled_back": True, "model_id": model_id, "entities": { "pathway": {"curie": pathway_curie, "label": pathway_label}, "molecular_function": {"curie": mf_curie, "label": mf_label}, "gene_product": {"curie": gene_product_curie, "label": gene_product_label}, "cellular_component": {"curie": cc_curie, "label": cc_label} } } if resp.error: return { "success": False, "error": resp.error, "model_id": model_id } # Return minimal success response return { "success": True, "pathway_created": True, "individuals_added": 4, "facts_added": 3 } # NOTE: This function is currently commented out and not registered as an MCP tool due to a known bug. # BUG DESCRIPTION: # - The function can create duplicate molecular function and gene product individuals in the GO-CAM model. # - This occurs when a partial rollback happens after a validation failure or error during the multi-step creation process. # - Not all changes are reliably reverted, so some individuals or facts may remain, leading to duplicate entries if the function is retried. # - The bug is most likely to occur when the model is in a partially valid state or when network/database errors interrupt the process. # - This function is retained in the codebase for future debugging and as a reference for the intended causal chain creation logic. # - Do NOT uncomment or register this function as a tool until the rollback logic is fixed and thoroughly tested. # - See issue tracker #<insert-issue-number-if-applicable> for more details and progress on resolving this bug. #@mcp.tool() async def add_causal_chain( model_id: str, mf1_curie: str, mf1_label: str, mf2_curie: str, mf2_label: str, gp1_curie: str, gp1_label: str, gp2_curie: str, gp2_label: str, causal_relation: str = "RO:0002411" ) -> Dict[str, Any]: """ Add two molecular functions connected by a causal relationship. Creates two complete activities and links them causally, with label validation to prevent ID errors. Args: model_id: The GO-CAM model identifier mf1_curie: First molecular function ID mf1_label: First molecular function label mf2_curie: Second molecular function ID mf2_label: Second molecular function label gp1_curie: Gene product for first MF gp1_label: Gene product name for first MF gp2_curie: Gene product for second MF gp2_label: Gene product name for second MF causal_relation: Causal relation (default: RO:0002411 - causally upstream of) Returns: Barista API response. If validation fails, includes error details. Examples: # Kinase activating another kinase add_causal_chain( "gomodel:12345", "GO:0004674", "protein serine/threonine kinase activity", "GO:0004674", "protein serine/threonine kinase activity", "UniProtKB:P31749", "AKT1", "UniProtKB:P31751", "AKT2", "RO:0002629" # directly positively regulates ) # Receptor activating kinase cascade add_causal_chain( "gomodel:12345", "GO:0004888", "transmembrane signaling receptor activity", "GO:0004674", "protein serine/threonine kinase activity", "UniProtKB:P04626", "ERBB2", "UniProtKB:P31749", "AKT1", "RO:0002411" # causally upstream of ) # Transcription factor inhibiting another add_causal_chain( "gomodel:12345", "GO:0003700", "DNA-binding transcription factor activity", "GO:0003700", "DNA-binding transcription factor activity", "UniProtKB:P01106", "MYC", "UniProtKB:Q01094", "E2F1", "RO:0002630" # directly negatively regulates ) # Common causal relations: # RO:0002411 - causally upstream of (general) # RO:0002629 - directly positively regulates # RO:0002630 - directly negatively regulates # RO:0002413 - provides input for # RO:0002407 - indirectly positively regulates # RO:0002409 - indirectly negatively regulates # RO:0002304 - causally upstream of, positive effect # RO:0002305 - causally upstream of, negative effect Notes: - All labels are required to prevent ID hallucination - Operations are executed with validation and automatic rollback - If any validation fails, all changes are rolled back """ client = get_client() reqs: List[Dict[str, Any]] = [] # First activity reqs.append(client.req_add_individual(model_id, mf1_curie, "mf1")) reqs.append(client.req_add_individual(model_id, gp1_curie, "gp1")) reqs.append(client.req_add_fact(model_id, "mf1", "gp1", "RO:0002333")) # Second activity reqs.append(client.req_add_individual(model_id, mf2_curie, "mf2")) reqs.append(client.req_add_individual(model_id, gp2_curie, "gp2")) reqs.append(client.req_add_fact(model_id, "mf2", "gp2", "RO:0002333")) # Causal connection reqs.append(client.req_add_fact(model_id, "mf1", "mf2", causal_relation)) # Execute with validation expected_individuals = [ {"id": mf1_curie, "label": mf1_label}, {"id": gp1_curie, "label": gp1_label}, {"id": mf2_curie, "label": mf2_label}, {"id": gp2_curie, "label": gp2_label} ] resp = client.execute_with_validation(reqs, expected_individuals=expected_individuals) if resp.validation_failed: return { "success": False, "error": "Validation failed", "reason": resp.validation_reason, "rolled_back": True, "model_id": model_id, "entities": { "activity1": { "molecular_function": {"curie": mf1_curie, "label": mf1_label}, "gene_product": {"curie": gp1_curie, "label": gp1_label} }, "activity2": { "molecular_function": {"curie": mf2_curie, "label": mf2_label}, "gene_product": {"curie": gp2_curie, "label": gp2_label} } } } if resp.error: return { "success": False, "error": resp.error, "model_id": model_id } # Return minimal success response return { "success": True, "causal_chain_created": True, "activities_added": 2, "causal_relationship": causal_relation } @mcp.tool() async def model_summary(model_id: str) -> Dict[str, Any]: """ Get a summary of a GO-CAM model including counts and key information. Args: model_id: The GO-CAM model identifier Returns: Summary with individual count, fact count, and predicate distribution Examples: # Get summary of a model result = model_summary("gomodel:5fce9b7300001215") # Returns: # { # "model_id": "gomodel:5fce9b7300001215", # "state": "production", # "individual_count": 42, # "fact_count": 67, # "predicate_distribution": { # "RO:0002333": 15, # enabled_by (note: not in vetted list) # "RO:0002411": 8, # causally upstream of # "BFO:0000066": 12, # occurs_in # "BFO:0000050": 5 # part_of # } # } # Check if a model is empty result = model_summary("gomodel:new_empty_model") if result["individual_count"] == 0: print("Model is empty") # Analyze model complexity result = model_summary("gomodel:12345") causal_edges = result["predicate_distribution"].get("RO:0002411", 0) causal_edges += result["predicate_distribution"].get("RO:0002413", 0) # provides input for causal_edges += result["predicate_distribution"].get("RO:0002629", 0) # directly positively regulates causal_edges += result["predicate_distribution"].get("RO:0002630", 0) # directly negatively regulates print(f"Model has {causal_edges} causal relationships") """ client = get_client() resp = client.get_model(model_id) if resp.validation_failed: return { "success": False, "error": "Validation failed", "reason": resp.validation_reason, "model_id": model_id } if resp.error: return { "success": False, "error": "Failed to retrieve model", "reason": resp.error, "model_id": model_id } # Extract summary information individuals = resp.individuals facts = resp.facts # Count predicates predicate_counts: Dict[str, int] = {} for fact in facts: # fact is now a Pydantic Fact object, not a dict pred = fact.property if hasattr(fact, 'property') else "unknown" predicate_counts[pred] = predicate_counts.get(pred, 0) + 1 # Get model state if available model_state = resp.model_state return { "success": True, "model_id": model_id, "state": model_state, "individual_count": len(individuals), "fact_count": len(facts), "predicate_distribution": predicate_counts, } @mcp.tool() async def get_model_variables(model_id: str) -> Dict[str, Any]: """ Get the currently bound variables for a GO-CAM model. Returns a mapping of variable names to their actual individual IDs. This is useful for understanding what variables are available in the current model context, especially after batch operations. Args: model_id: The GO-CAM model identifier Returns: Dictionary with variable mappings and model information Examples: # Get variables after creating individuals vars = get_model_variables("gomodel:12345") # Returns: # { # "model_id": "gomodel:12345", # "variables": { # "mf1": "gomodel:12345/68dee4d300000481", # "gp1": "gomodel:12345/68dee4d300000482", # "cc1": "gomodel:12345/68dee4d300000483" # }, # "individual_count": 3 # } # Use the variables in subsequent operations vars = get_model_variables("gomodel:12345") mf_id = vars["variables"]["mf1"] add_fact("gomodel:12345", mf_id, vars["variables"]["gp1"], "RO:0002333") Notes: - Variables are only valid within the same batch operation - This tool helps identify actual IDs for cross-batch operations - If the model has no tracked variables, returns empty dict """ client = get_client() # Check if the client has variable tracking enabled if not hasattr(client, 'track_variables') or not client.track_variables: client.track_variables = True # Get the model to see current state resp = client.get_model(model_id) if resp.error: return { "success": False, "error": resp.error, "model_id": model_id } # Get variables from the client's registry variables = {} if hasattr(client, '_variable_registry') and client._variable_registry: # The registry is keyed by (model_id, variable_name) -> actual_id # We need to extract variables for this specific model variables = client.get_variables(model_id) # Also check if the last response has model_vars if hasattr(resp, 'model_vars') and resp.model_vars: variables.update(resp.model_vars) # Count individuals individual_count = len(resp.individuals) if resp.individuals else 0 return { "success": True, "model_id": model_id, "variables": variables, "individual_count": individual_count, } @mcp.tool() async def search_models( title: Optional[str] = None, state: Optional[str] = None, contributor: Optional[str] = None, group: Optional[str] = None, pmid: Optional[str] = None, gene_product: Optional[str] = None, limit: int = 50, offset: int = 0 ) -> Dict[str, Any]: """ Search for GO-CAM models based on various criteria. Allows searching models by title, state, contributor, group, publication, or gene product. Returns a list of matching models with their metadata. Args: title: Search for models containing this text in their title state: Filter by model state (production, development, internal_test) contributor: Filter by contributor ORCID (e.g., 'https://orcid.org/0000-0002-6601-2165') group: Filter by group/provider (e.g., 'http://www.wormbase.org') pmid: Filter by PubMed ID (e.g., 'PMID:12345678') gene_product: Filter by gene product (e.g., 'UniProtKB:Q9BRQ8', 'MGI:MGI:97490') limit: Maximum number of results to return (default: 50) offset: Offset for pagination (default: 0) Returns: Dictionary containing search results with model metadata Examples: # Search for all production models results = search_models(state="production") # Find models containing "Wnt signaling" in title results = search_models(title="Wnt signaling") # Find models for a specific gene product results = search_models(gene_product="UniProtKB:P38398") # Find models from a specific paper results = search_models(pmid="PMID:30194302") # Find models by a specific contributor results = search_models( contributor="https://orcid.org/0000-0002-6601-2165" ) # Combine filters results = search_models( state="production", title="kinase", limit=10 ) # Pagination example page1 = search_models(limit=50, offset=0) page2 = search_models(limit=50, offset=50) # Find models from specific research group results = search_models(group="http://www.wormbase.org") # Search for development models with specific gene results = search_models( state="development", gene_product="MGI:MGI:97490" ) Notes: - Results include model ID, title, state, contributors, and dates - Use pagination (offset/limit) for large result sets - Filters can be combined for more specific searches - Gene products can be from various databases (UniProt, MGI, RGD, etc.) """ client = get_client() try: results = client.list_models( title=title, state=state, contributor=contributor, group=group, pmid=pmid, gp=gene_product, limit=limit, offset=offset ) return results except Exception as e: return { "error": "Failed to search models", "message": str(e) } @mcp.tool() async def search_bioentities( text: Optional[str] = None, taxon: Optional[str] = None, bioentity_type: Optional[str] = None, source: Optional[str] = None, limit: int = 10, offset: int = 0 ) -> Dict[str, Any]: """ Search for bioentities (genes/proteins) using Gene Ontology data. Searches across gene and protein names/labels with optional taxonomic filtering. Provides access to comprehensive bioentity information from GOlr. Args: text: Text search across names and labels (e.g., "insulin", "kinase") taxon: Organism filter - accepts NCBI Taxon ID with or without prefix (e.g., "9606", "NCBITaxon:9606" for human) bioentity_type: Type filter (e.g., "protein", "gene") source: Source database filter (e.g., "UniProtKB", "MGI", "RGD") limit: Maximum number of results to return (default: 10) offset: Starting offset for pagination (default: 0) Returns: Dictionary containing search results with bioentity information Examples: # Search for human insulin proteins results = search_bioentities( text="insulin", taxon="9606", bioentity_type="protein" ) # Find mouse kinases from MGI results = search_bioentities( text="kinase", taxon="NCBITaxon:10090", source="MGI", limit=20 ) # Search for any human genes/proteins results = search_bioentities( taxon="9606", limit=50 ) # Find specific protein types results = search_bioentities( text="receptor", bioentity_type="protein", limit=25 ) # Search across all organisms results = search_bioentities(text="p53") # Pagination example page1 = search_bioentities(text="kinase", limit=10, offset=0) page2 = search_bioentities(text="kinase", limit=10, offset=10) # Common organisms: # Human: "9606" or "NCBITaxon:9606" # Mouse: "10090" or "NCBITaxon:10090" # Rat: "10116" or "NCBITaxon:10116" # Fly: "7227" or "NCBITaxon:7227" # Worm: "6239" or "NCBITaxon:6239" # Yeast: "559292" or "NCBITaxon:559292" Notes: - Results include ID, name, type, organism, and source information - Text search covers both short names/symbols and full descriptions - Taxon IDs automatically handle NCBITaxon: prefix normalization - Use pagination for large result sets - Sources include UniProtKB, MGI, RGD, ZFIN, SGD, and others """ # Normalize taxon ID - add NCBITaxon prefix if just a number if taxon and not taxon.startswith("NCBITaxon:"): if taxon.isdigit(): taxon = f"NCBITaxon:{taxon}" try: with AmigoClient() as client: results = client.search_bioentities( text=text, taxon=taxon, bioentity_type=bioentity_type, source=source, limit=limit, offset=offset ) return { "results": [ { "id": result.id, "label": result.label, "name": result.name, "type": result.type, "taxon": result.taxon, "taxon_label": result.taxon_label, "source": result.source } for result in results ], "count": len(results), "limit": limit, "offset": offset } except Exception as e: return { "error": "Failed to search bioentities", "message": str(e) } @mcp.tool() async def search_annotations( bioentity: Optional[str] = None, go_term: Optional[str] = None, evidence_types: Optional[str] = None, taxon: Optional[str] = None, aspect: Optional[str] = None, assigned_by: Optional[str] = None, limit: int = 10 ) -> Dict[str, Any]: """ Search for GO annotations (evidence) with filtering. Args: bioentity: Specific bioentity ID to filter by (e.g., "UniProtKB:P12345") go_term: Specific GO term ID to filter by (e.g., "GO:0008150") evidence_types: Comma-separated evidence codes (e.g., "IDA,IPI,IMP") taxon: Organism filter - accepts numeric (9606) or full ID (NCBITaxon:9606) aspect: GO aspect filter - "C" (cellular component), "F" (molecular function), or "P" (biological process) assigned_by: Annotation source filter (e.g., "GOC", "UniProtKB", "MGI") limit: Maximum number of results (default: 10, max: 1000) Returns: Dictionary containing: - annotations: List of annotation results with evidence details - total: Number of results returned Examples: # Find all evidence for a specific protein search_annotations(bioentity="UniProtKB:P53762") # Find proteins with experimental evidence for a GO term search_annotations(go_term="GO:0005634", evidence_types="IDA,IPI") # Find human proteins in nucleus with experimental evidence search_annotations( go_term="GO:0005634", taxon="9606", evidence_types="IDA,IPI,IMP", aspect="C" ) # Find all UniProt annotations for apoptosis search_annotations( go_term="GO:0006915", assigned_by="UniProtKB" ) """ # Normalize taxon ID if taxon and not taxon.startswith("NCBITaxon:"): if taxon.isdigit(): taxon = f"NCBITaxon:{taxon}" # Parse evidence types evidence_list = None if evidence_types: evidence_list = [e.strip() for e in evidence_types.split(",")] # Limit bounds limit = min(max(1, limit), 1000) try: with AmigoClient() as client: results = client.search_annotations( bioentity=bioentity, go_term=go_term, evidence_types=evidence_list, taxon=taxon, aspect=aspect, assigned_by=assigned_by, limit=limit ) return { "annotations": [ { "bioentity": r.bioentity, "bioentity_label": r.bioentity_label, "bioentity_name": r.bioentity_name, "go_term": r.annotation_class, "go_term_label": r.annotation_class_label, "aspect": r.aspect, "evidence_type": r.evidence_type, "evidence": r.evidence, "evidence_label": r.evidence_label, "reference": r.reference, "assigned_by": r.assigned_by, "date": r.date, "taxon": r.taxon, "taxon_label": r.taxon_label, "qualifier": r.qualifier, "annotation_extension": r.annotation_extension } for r in results ], "total": len(results) } except Exception as e: return { "error": "Failed to search annotations", "message": str(e) } @mcp.tool() async def get_annotations_for_bioentity( bioentity_id: str, go_terms: Optional[str] = None, evidence_types: Optional[str] = None, aspect: Optional[str] = None, limit: int = 100 ) -> Dict[str, Any]: """ Get all GO annotations (evidence) for a specific bioentity. Args: bioentity_id: The bioentity ID (e.g., "UniProtKB:P12345") go_terms: Comma-separated GO terms to filter (includes child terms) evidence_types: Comma-separated evidence codes to filter (e.g., "IDA,IPI") aspect: GO aspect filter - "C", "F", or "P" limit: Maximum number of results (default: 100) Returns: Dictionary containing: - bioentity_id: The queried bioentity - annotations: List of annotation results - summary: Count by aspect and evidence type Examples: # Get all annotations for a protein get_annotations_for_bioentity("UniProtKB:P53762") # Get only experimental evidence get_annotations_for_bioentity( "UniProtKB:P53762", evidence_types="IDA,IPI,IMP" ) # Get annotations for specific GO terms get_annotations_for_bioentity( "UniProtKB:P53762", go_terms="GO:0005634,GO:0005737" ) # Get only molecular function annotations get_annotations_for_bioentity( "UniProtKB:P53762", aspect="F" ) """ # Parse comma-separated lists go_terms_list = None if go_terms: go_terms_list = [t.strip() for t in go_terms.split(",")] evidence_list = None if evidence_types: evidence_list = [e.strip() for e in evidence_types.split(",")] try: with AmigoClient() as client: results = client.get_annotations_for_bioentity( bioentity_id=bioentity_id, go_terms_closure=go_terms_list, evidence_types=evidence_list, aspect=aspect, limit=limit ) # Calculate summary statistics aspect_counts: Dict[str, int] = {} evidence_counts: Dict[str, int] = {} for r in results: aspect_counts[r.aspect] = aspect_counts.get(r.aspect, 0) + 1 evidence_counts[r.evidence_type] = evidence_counts.get(r.evidence_type, 0) + 1 return { "bioentity_id": bioentity_id, "annotations": [ { "go_term": r.annotation_class, "go_term_label": r.annotation_class_label, "aspect": r.aspect, "evidence_type": r.evidence_type, "evidence": r.evidence, "evidence_label": r.evidence_label, "reference": r.reference, "assigned_by": r.assigned_by, "date": r.date, "qualifier": r.qualifier, "annotation_extension": r.annotation_extension } for r in results ], "summary": { "total": len(results), "by_aspect": aspect_counts, "by_evidence_type": evidence_counts } } except Exception as e: return { "error": "Failed to get annotations", "message": str(e) } # Prompts to help users construct common patterns @mcp.prompt() def create_basic_activity() -> str: """Generate a prompt for creating a basic GO-CAM activity.""" return """To create a basic GO-CAM activity, use: 1. add_individual to create a molecular function instance 2. add_individual to create a gene product instance 3. add_fact with RO:0002333 (enabled_by) to connect them """ @mcp.prompt() def add_evidence_prompt() -> str: """Generate a prompt for adding evidence to facts.""" return """To add evidence to a fact: 1. Use add_evidence_to_fact with the fact coordinates 2. Provide an ECO code (e.g., ECO:0000353 for IPI) 3. Include source references (e.g., PMID:12345) """ # Resources for GO-CAM guidelines def _get_available_guidelines() -> List[str]: """Get list of available guideline files.""" if not GUIDELINES_DIR.exists(): return [] return sorted([f.stem for f in GUIDELINES_DIR.glob("*.md")]) def _inject_guideline_list(content: str, title: str) -> str: """Inject list of available guidelines at the end of content.""" guidelines = _get_available_guidelines() if not guidelines: return content # Create formatted list guideline_section = "\n\n## Available GO-CAM Guidelines\n\n" guideline_section += f"This is the '{title}' guideline. Other available guidelines include:\n\n" for guide in guidelines: # Skip the current one if guide == title: continue # Make the filename more readable readable_name = guide.replace("_", " ").replace("-", " ") guideline_section += f"- {readable_name}\n" guideline_section += "\nUse the `get_guideline_content` tool to access any specific guideline." return content + guideline_section @mcp.resource("guidelines://modeling-best-practices") async def get_modeling_guidelines() -> str: """GO-CAM modeling best practices and general guidelines.""" # Try multiple possible filenames possible_files = [ "GO-CAM_annotation_guidelines_README.md", "GO-CAM_modelling_guidelines_TO_DO.md", "WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md" ] for filename in possible_files: file_path = GUIDELINES_DIR / filename if file_path.exists(): with open(file_path) as f: content = f.read() return _inject_guideline_list(content, file_path.stem) # If none found, return a generic message with list return _inject_guideline_list( "# GO-CAM Modeling Guidelines\n\nNo main guideline file found.", "modeling-best-practices" ) @mcp.resource("guidelines://evidence-requirements") async def get_evidence_guidelines() -> str: """Evidence code usage and requirements for GO-CAM.""" # Look for E3 ubiquitin ligases as it contains evidence info file_path = GUIDELINES_DIR / "E3_ubiquitin_ligases.md" if file_path.exists(): with open(file_path) as f: content = f.read() return _inject_guideline_list(content, file_path.stem) # Fallback with list return _inject_guideline_list( "# Evidence Requirements\n\nNo evidence guideline file found.", "evidence-requirements" ) @mcp.resource("guidelines://complex-annotations") async def get_complex_guidelines() -> str: """Guidelines for annotating protein complexes in GO-CAM.""" file_path = GUIDELINES_DIR / "How_to_annotate_complexes_in_GO-CAM.md" if file_path.exists(): with open(file_path) as f: content = f.read() return _inject_guideline_list(content, file_path.stem) return _inject_guideline_list( "# Complex Annotation Guidelines\n\nNo complex guideline file found.", "complex-annotations" ) @mcp.tool() async def list_guidelines() -> Dict[str, Any]: """List all available GO-CAM guideline documents. Returns a list of available guideline names that can be accessed using the get_guideline_content tool. Returns: Dictionary with 'guidelines' key containing list of available guidelines Examples: # List all available guidelines result = list_guidelines() for guide in result['guidelines']: print(guide) """ guidelines = _get_available_guidelines() return { "guidelines": guidelines, "count": len(guidelines), "note": "Use get_guideline_content(guideline_name) to fetch any guideline" } @mcp.tool() async def get_guideline_content(guideline_name: str) -> Dict[str, Any]: """Fetch specific GO-CAM guideline content. Args: guideline_name: Name of guideline file (without .md extension). Use list_guidelines() to see available options. Returns: Dictionary with guideline content or error message Examples: # Get a specific guideline content = get_guideline_content("E3_ubiquitin_ligases") # Get transcription factor guidelines content = get_guideline_content("DNA-binding_transcription_factor_activity_annotation_guidelines") """ file_path = GUIDELINES_DIR / f"{guideline_name}.md" if not file_path.exists(): available = _get_available_guidelines() return { "success": False, "error": f"Guideline '{guideline_name}' not found", "available_guidelines": available, "hint": "Use one of the available guideline names listed above" } try: with open(file_path) as f: content = f.read() # Extract first heading as description lines = content.split('\n') description = "" for line in lines: if line.strip(): description = line.strip('#').strip() break return { "success": True, "guideline_name": guideline_name, "description": description, "content": content, "length": len(content) } except Exception as e: return { "success": False, "error": f"Failed to read guideline: {str(e)}", "guideline_name": guideline_name } if __name__ == "__main__": mcp.run()

Loading blob content...

Implementation Reference

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/geneontology/noctua-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

mcp_server.py•63.7 KiB