schema_info:
title: Gene Ontology (GO)
description: |
The Gene Ontology provides a controlled vocabulary of terms for describing gene and gene product attributes across all organisms. GO terms are organized into three ontology domains: biological_process (30,804 terms describing biological objectives), molecular_function (12,793 terms describing molecular activities), and cellular_component (4,568 terms describing cellular locations). Each term includes definitions, synonyms, hierarchical relationships, and cross-references to external databases. The ontology enables standardized annotation of gene products and supports computational analysis of biological data.
endpoint: https://rdfportal.org/primary/sparql
base_uri: http://purl.obolibrary.org/obo/
graphs:
- http://rdfportal.org/ontology/go
- http://purl.jp/bio/11/goa
version:
mie_version: '1.0'
mie_created: '2025-01-27'
data_version: Latest GO Release
update_frequency: Monthly
license:
data_license: Creative Commons Attribution 4.0 International (CC BY 4.0)
license_url: https://creativecommons.org/licenses/by/4.0/
access:
rate_limiting: 100 queries/min
max_query_timeout: 60 seconds
shape_expressions: |
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX oboinowl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
<GOTermShape> {
a [ owl:Class ] ;
oboinowl:id xsd:string ;
rdfs:label xsd:string ;
obo:IAO_0000115 xsd:string ;
oboinowl:hasOBONamespace [ "biological_process" "molecular_function" "cellular_component" "external" ] ;
rdfs:subClassOf IRI * ;
oboinowl:hasExactSynonym xsd:string * ;
oboinowl:hasRelatedSynonym xsd:string * ;
oboinowl:hasNarrowSynonym xsd:string * ;
oboinowl:hasBroadSynonym xsd:string * ;
oboinowl:hasDbXref xsd:string * ;
oboinowl:inSubset IRI * ;
owl:deprecated xsd:boolean ? ;
oboinowl:hasAlternativeId xsd:string * ;
rdfs:comment xsd:string ?
}
sample_rdf_entries:
- title: Biological Process Term - Chromatin Remodeling
description: A biological process term describing dynamic chromatin reorganization. Includes hierarchical relationships.
rdf: |
@prefix obo: <http://purl.obolibrary.org/obo/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix oboinowl: <http://www.geneontology.org/formats/oboInOwl#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
obo:GO_0006338 a owl:Class ;
oboinowl:id "GO:0006338" ;
rdfs:label "chromatin remodeling" ;
obo:IAO_0000115 "A dynamic process of chromatin reorganization resulting in changes to chromatin structure." ;
oboinowl:hasOBONamespace "biological_process" ;
rdfs:subClassOf obo:GO_0006325 .
- title: Cellular Component Term - Nucleus
description: A cellular component term with multiple subsets and cross-references. Shows extensive synonym coverage and integration with GO slim subsets.
rdf: |
@prefix obo: <http://purl.obolibrary.org/obo/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix oboinowl: <http://www.geneontology.org/formats/oboInOwl#> .
obo:GO_0005634 a owl:Class ;
oboinowl:id "GO:0005634" ;
rdfs:label "nucleus" ;
obo:IAO_0000115 "A membrane-bounded organelle of eukaryotic cells in which chromosomes are housed and replicated." ;
oboinowl:hasOBONamespace "cellular_component" ;
rdfs:subClassOf obo:GO_0043231 ;
oboinowl:hasExactSynonym "cell nucleus" ;
oboinowl:hasNarrowSynonym "horsetail nucleus" ;
oboinowl:hasDbXref "NIF_Subcellular:sao1702920020" ;
oboinowl:hasDbXref "Wikipedia:Cell_nucleus" ;
oboinowl:inSubset obo:go#goslim_generic .
- title: Molecular Function Term - Protein Kinase Activity
description: A molecular function term describing catalytic activity with reaction details in the definition.
rdf: |
@prefix obo: <http://purl.obolibrary.org/obo/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix oboinowl: <http://www.geneontology.org/formats/oboInOwl#> .
obo:GO_0004672 a owl:Class ;
oboinowl:id "GO:0004672" ;
rdfs:label "protein kinase activity" ;
obo:IAO_0000115 "Catalysis of the phosphorylation of an amino acid residue in a protein, usually according to the reaction: a protein + ATP = a phosphoprotein + ADP." ;
oboinowl:hasOBONamespace "molecular_function" .
- title: Term with External Cross-References
description: GO term linked to external databases including Wikipedia for integrated knowledge access.
rdf: |
@prefix obo: <http://purl.obolibrary.org/obo/> .
@prefix oboinowl: <http://www.geneontology.org/formats/oboInOwl#> .
obo:GO_0006935 a owl:Class ;
oboinowl:id "GO:0006935" ;
rdfs:label "chemotaxis" ;
oboinowl:hasDbXref "Wikipedia:Chemotaxis" .
- title: Obsolete Term with Deprecation Flag
description: Example of deprecated GO term showing how obsolete terms are marked in the ontology.
rdf: |
@prefix obo: <http://purl.obolibrary.org/obo/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
obo:GO_0005623 a owl:Class ;
rdfs:label "obsolete cell" ;
owl:deprecated true ;
obo:IAO_0000115 "OBSOLETE. The basic structural and functional unit of all organisms." .
sparql_query_examples:
- title: Search GO terms by keyword
description: Find GO terms matching specific keywords using full-text search
question: Find all GO terms related to "apoptosis"
complexity: basic
sparql: |
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT ?go ?label
FROM <http://rdfportal.org/ontology/go>
WHERE {
?go rdfs:label ?label .
?label bif:contains "'apoptosis'" .
FILTER(STRSTARTS(STR(?go), "http://purl.obolibrary.org/obo/GO_"))
}
LIMIT 20
- title: Filter GO terms by namespace
description: Retrieve terms from a specific GO domain
question: What are some molecular function terms?
complexity: basic
sparql: |
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX oboinowl: <http://www.geneontology.org/formats/oboInOwl#>
SELECT DISTINCT ?go ?label ?namespace
FROM <http://rdfportal.org/ontology/go>
WHERE {
?go rdfs:label ?label .
?go oboinowl:hasOBONamespace ?namespace .
FILTER(STR(?namespace) = "molecular_function")
FILTER(STRSTARTS(STR(?go), "http://purl.obolibrary.org/obo/GO_"))
}
LIMIT 10
- title: Search with keyword filtering and namespace
description: Combine keyword search with namespace filtering for targeted results
question: Find kinase-related molecular functions
complexity: intermediate
sparql: |
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX oboinowl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT DISTINCT ?go ?label ?definition ?namespace
FROM <http://rdfportal.org/ontology/go>
WHERE {
?go rdfs:label ?label .
?go obo:IAO_0000115 ?definition .
?go oboinowl:hasOBONamespace ?namespace .
?label bif:contains "'kinase'" .
FILTER(STR(?namespace) = "molecular_function")
FILTER(STRSTARTS(STR(?go), "http://purl.obolibrary.org/obo/GO_"))
}
LIMIT 10
- title: Search definitions with complex keywords
description: Use boolean operators in full-text search across definitions
question: Find terms related to both mitochondria and transport
complexity: intermediate
sparql: |
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT DISTINCT ?go ?label ?definition
FROM <http://rdfportal.org/ontology/go>
WHERE {
?go rdfs:label ?label .
?go obo:IAO_0000115 ?definition .
?definition bif:contains "('mitochondri*' AND 'transport')" .
FILTER(STRSTARTS(STR(?go), "http://purl.obolibrary.org/obo/GO_"))
}
LIMIT 10
- title: Retrieve GO term with all properties
description: Get complete information about a specific GO term including all annotations
question: What are all the properties of GO:0005634 (nucleus)?
complexity: intermediate
sparql: |
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT ?property ?value
FROM <http://rdfportal.org/ontology/go>
WHERE {
obo:GO_0005634 ?property ?value .
}
LIMIT 50
- title: Find hierarchical relationships
description: Query parent-child relationships between GO terms
question: What are the parent terms of chromatin remodeling?
complexity: advanced
sparql: |
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?child ?childLabel ?parent ?parentLabel
FROM <http://rdfportal.org/ontology/go>
WHERE {
?child rdfs:subClassOf ?parent .
?child rdfs:label ?childLabel .
?parent rdfs:label ?parentLabel .
FILTER(?child = obo:GO_0006338)
FILTER(STRSTARTS(STR(?parent), "http://purl.obolibrary.org/obo/GO_"))
}
- title: Count terms by namespace
description: Aggregate statistics showing distribution of GO terms across namespaces
question: How many terms are in each namespace?
complexity: advanced
sparql: |
PREFIX oboinowl: <http://www.geneontology.org/formats/oboInOwl#>
SELECT ?namespace (COUNT(DISTINCT ?go) as ?count)
FROM <http://rdfportal.org/ontology/go>
WHERE {
?go oboinowl:hasOBONamespace ?namespace .
FILTER(STRSTARTS(STR(?go), "http://purl.obolibrary.org/obo/GO_"))
}
GROUP BY ?namespace
ORDER BY DESC(?count)
cross_references:
- pattern: oboinowl:hasDbXref
description: |
GO terms include cross-references to external databases via hasDbXref property. These links connect GO terms to complementary resources including Wikipedia for general knowledge, pathway databases (Reactome, KEGG), disease/phenotype ontologies (MESH, SNOMEDCT, NCIt), and structural databases (NIF_Subcellular). Cross-references enable integrated analysis across biological knowledge bases.
databases:
general_knowledge:
- Wikipedia: extensive coverage
pathway_databases:
- Reactome: biochemical pathways
- KEGG_REACTION: metabolic reactions
- RHEA: enzyme reactions
- EC: enzyme classification
disease_phenotype:
- MESH: medical subject headings
- SNOMEDCT: clinical terminology
- NCIt: cancer terminology
structural:
- NIF_Subcellular: subcellular structures
sparql: |
PREFIX oboinowl: <http://www.geneontology.org/formats/oboInOwl#>
SELECT DISTINCT ?go ?xref
FROM <http://rdfportal.org/ontology/go>
WHERE {
?go oboinowl:hasDbXref ?xref .
FILTER(STRSTARTS(STR(?go), "http://purl.obolibrary.org/obo/GO_"))
FILTER(REGEX(?xref, "^Wikipedia:"))
}
LIMIT 20
architectural_notes:
schema_design:
- GO terms organized into three independent namespaces (biological_process, molecular_function, cellular_component) plus external
- Hierarchical structure using rdfs:subClassOf for parent-child relationships forming directed acyclic graphs (DAGs)
- Each term has unique GO ID (oboinowl:id) in format "GO:NNNNNNN"
- Extensive synonym system with four types (exact, related, narrow, broad) for comprehensive term matching
- Subset system (inSubset) provides organism-specific and application-specific GO slim views
performance:
- CRITICAL - Always use FROM clause to specify graph
- Use bif:contains for keyword filtering instead of REGEX for better performance on Virtuoso backend
- Use STR() for namespace comparisons to avoid datatype mismatch issues
- Filter by namespace early in queries to reduce result set size
- Limit STRSTARTS filters to GO_ prefix to avoid processing all ontology terms
- Aggregation queries may timeout due to large dataset size; use LIMIT appropriately
data_integration:
- External database cross-references (hasDbXref) connect to 20+ resources
- Many GO terms return duplicate rows in results; use DISTINCT to deduplicate
data_quality:
- Obsolete terms marked with owl:deprecated true flag
- Alternative IDs (hasAlternativeId) track merged term history
- Definitions (IAO_0000115) required for all non-obsolete terms
- Created_by and creation_date properties track term provenance
data_statistics:
total_go_terms: 48165
terms_by_namespace:
biological_process: 30804
molecular_function: 12793
cellular_component: 4568
external: 11
coverage:
terms_with_definitions: ~100%
terms_with_synonyms: ~80%
terms_with_cross_references: ~52%
deprecated_terms: ~25%
cardinality:
avg_synonyms_per_term: 1.6
avg_xrefs_per_term: 0.5
avg_subsets_per_term: 0.1
performance_characteristics:
- Simple label searches: <1 second
- Keyword searches with bif:contains: 1-3 seconds
- Hierarchical queries: 1-5 seconds
- Aggregation queries: may timeout, use LIMIT
data_quality_notes:
- Duplicate rows common in results due to multiple graph storage
- Always use DISTINCT in SELECT queries
- FILTER by GO_ prefix to exclude other ontology terms
anti_patterns:
- title: Missing FROM clause
problem: Queries timeout or return no results without FROM clause
wrong_sparql: |
SELECT ?go ?label
WHERE {
?go rdfs:label ?label .
}
correct_sparql: |
SELECT ?go ?label
FROM <http://rdfportal.org/ontology/go>
WHERE {
?go rdfs:label ?label .
}
explanation: CRITICAL - FROM clause is required for GO queries at this endpoint. Without it, queries fail or timeout.
- title: Namespace filter without STR()
problem: Datatype mismatch causes namespace filters to fail
wrong_sparql: |
SELECT ?go ?label ?namespace
WHERE {
?go oboinowl:hasOBONamespace ?namespace .
FILTER(?namespace = "molecular_function")
}
correct_sparql: |
SELECT ?go ?label ?namespace
WHERE {
?go oboinowl:hasOBONamespace ?namespace .
FILTER(STR(?namespace) = "molecular_function")
}
explanation: Use STR() to convert namespace values to plain strings for comparison to avoid datatype mismatch issues.
- title: Using REGEX instead of bif:contains for keyword search
problem: REGEX is slow on large text fields and doesn't leverage Virtuoso's full-text indexing
wrong_sparql: |
SELECT ?go ?label
WHERE {
?go rdfs:label ?label .
FILTER(REGEX(?label, "kinase", "i"))
}
correct_sparql: |
SELECT ?go ?label
WHERE {
?go rdfs:label ?label .
?label bif:contains "'kinase'" .
}
explanation: bif:contains uses Virtuoso's full-text index and is 10-100x faster than REGEX for keyword matching
- title: Forgetting to filter by GO_ prefix
problem: Returns terms from other ontologies (PRO, CHEBI, etc.) mixed with GO terms
wrong_sparql: |
SELECT ?term ?label
WHERE {
?term rdfs:label ?label .
?label bif:contains "'apoptosis'" .
}
correct_sparql: |
SELECT ?go ?label
WHERE {
?go rdfs:label ?label .
?label bif:contains "'apoptosis'" .
FILTER(STRSTARTS(STR(?go), "http://purl.obolibrary.org/obo/GO_"))
}
explanation: The GO graph contains terms from multiple OBO ontologies; always filter by GO_ prefix to get only GO terms
- title: Not using DISTINCT with duplicate results
problem: GO terms stored in multiple graphs cause duplicate rows in results
wrong_sparql: |
SELECT ?go ?label ?namespace
WHERE {
?go rdfs:label ?label .
?go oboinowl:hasOBONamespace ?namespace .
}
correct_sparql: |
SELECT DISTINCT ?go ?label ?namespace
WHERE {
?go rdfs:label ?label .
?go oboinowl:hasOBONamespace ?namespace .
}
explanation: Always use DISTINCT to remove duplicate rows caused by multiple graph storage
common_errors:
- error: Query timeout or no results
causes:
- Missing FROM clause (CRITICAL)
- Aggregating over all 48,000+ GO terms without filters
- Complex COUNT or GROUP BY without LIMIT
solutions:
- Always add FROM <http://rdfportal.org/ontology/go>
- Add namespace filter to reduce dataset size
- Use LIMIT to cap result size
- Filter by specific GO term ranges or keywords first
example_fix: |
# CRITICAL: Always include FROM clause
SELECT ?namespace (COUNT(?go) as ?count)
FROM <http://rdfportal.org/ontology/go>
WHERE {
?go oboinowl:hasOBONamespace ?namespace .
FILTER(STRSTARTS(STR(?go), "http://purl.obolibrary.org/obo/GO_"))
}
GROUP BY ?namespace
- error: Empty results with namespace filter
causes:
- Datatype mismatch in namespace comparison
- Querying wrong graph or missing FROM clause
- Combining filters that exclude all results
solutions:
- CRITICAL - Use STR() for namespace comparisons
- 'Verify namespace values: ''biological_process'', ''molecular_function'', ''cellular_component'' (not ''BP'', ''MF'', ''CC'')'
- Check that GO term has the property being filtered
- Test namespace filter separately before combining with other filters
example_fix: |
# Wrong:
FILTER(?namespace = "molecular_function")
# Correct:
FILTER(STR(?namespace) = "molecular_function")
- error: Boolean operators not working in bif:contains
causes:
- Incorrect syntax for AND/OR/NOT operators
- Missing quotes or parentheses
solutions:
- Use single quotes around entire search expression
- Use parentheses for operator precedence
- 'Wildcard: use * for suffix matching'
example_fix: |
# Wrong:
?label bif:contains "kinase AND protein"
# Correct:
?label bif:contains "'kinase' AND 'protein'"
# With wildcards and NOT:
?label bif:contains "('phosph*' AND NOT 'kinase')"