schema_info:
title: MediaDive - Microbial Culture Media Database
description: |
Comprehensive culture media database from DSMZ with 3,289 standardized recipes for bacteria, archaea, fungi, yeast, microalgae, and phages. Includes 1,489 ingredients with chemical cross-references (GMO 41%, CAS 39%, ChEBI 32%), 45,685 strain records (73% with BacDive links), and growth conditions (pH, temperature, oxygen). Hierarchical recipe structure: medium → solution → solution_recipe → ingredient with detailed preparation protocols.
endpoint: https://rdfportal.org/primary/sparql
base_uri: https://purl.dsmz.de/mediadive/
graphs:
- http://rdfportal.org/dataset/mediadive
version:
mie_version: "1.0"
mie_created: "2024-12-08"
data_version: "Current"
update_frequency: "Regular"
license:
data_license: "CC BY 4.0"
license_url: "https://www.dsmz.de"
access:
rate_limiting: "Reasonable use"
max_query_timeout: "60 seconds"
backend: "Virtuoso"
shape_expressions: |
PREFIX schema: <https://purl.dsmz.de/schema/>
PREFIX dct: <http://purl.org/dc/terms/>
<MediumShape> {
a [ schema:CultureMedium ] ;
rdfs:label xsd:string ;
schema:belongsToGroup xsd:string ;
schema:hasFinalPH xsd:string ? ;
schema:hasMinPH xsd:float ? ;
schema:hasMaxPH xsd:float ? ;
schema:isComplex xsd:boolean
}
<IngredientShape> {
a [ schema:Ingredient ] ;
rdfs:label xsd:string ;
schema:hasFormula xsd:string ? ;
schema:hasCAS xsd:string ? ;
schema:hasChEBI xsd:integer ? ;
schema:hasPubChem xsd:integer ? ;
schema:hasKEGG xsd:string ? ;
schema:hasGMO IRI ?
}
<MediumCompositionShape> {
a [ schema:MediumComposition ] ;
schema:partOfMedium @<MediumShape> ;
schema:containsIngredient @<IngredientShape> ;
schema:gramsPerLiter xsd:float ? ;
schema:isOptionalIngredient xsd:boolean
}
<GrowthShape> {
a [ schema:GrowthCondition ] ;
schema:partOfMedium @<MediumShape> ;
schema:relatedToStrain @<StrainShape> ;
schema:growthTemperature xsd:integer ;
schema:growthPH xsd:float ? ;
schema:hasOxygenRequirement xsd:string ?
}
<StrainShape> {
a [ schema:Strain ] ;
schema:hasDSMNumber xsd:integer ;
schema:hasBacDiveID xsd:integer ? ;
schema:hasSpecies xsd:string ;
schema:belongsTaxGroup xsd:string
}
sample_rdf_entries:
- title: "Culture Medium"
description: "Complete medium with pH and classification."
rdf: |
mediadive:medium/1118 a schema:CultureMedium ;
rdfs:label "MD1-MEDIUM" ;
schema:belongsToGroup "MD1-MEDIUM" ;
schema:hasFinalPH "7.0-7.2" ;
schema:isComplex true .
- title: "Ingredient with Cross-References"
description: "Chemical ingredient with multiple database identifiers."
rdf: |
mediadive:ingredient/234 a schema:Ingredient ;
rdfs:label "Glucose" ;
schema:hasFormula "C6H12O6" ;
schema:hasCAS "50-99-7" ;
schema:hasChEBI 17234 ;
schema:hasPubChem 5793 ;
schema:hasKEGG "C00031" .
- title: "Growth Conditions"
description: "Strain cultivation parameters."
rdf: |
mediadive:growth/5678 a schema:GrowthCondition ;
schema:partOfMedium mediadive:medium/1118 ;
schema:relatedToStrain mediadive:strain/12345 ;
schema:growthTemperature 37 ;
schema:growthPH 7.2 ;
schema:hasOxygenRequirement "aerobic" .
- title: "Solution Recipe"
description: "Ingredient amounts in solution."
rdf: |
mediadive:solution_recipe/890 a schema:SolutionRecipe ;
schema:partOfSolution mediadive:solution/sol123 ;
schema:includesIngredient mediadive:ingredient/234 ;
schema:ingredientAmount 10.0 ;
schema:ingredientUnit "g" ;
schema:gramsPerLiter 10.0 .
- title: "Microbial Strain"
description: "Strain with taxonomic data and BacDive link."
rdf: |
mediadive:strain/12345 a schema:Strain ;
schema:hasDSMNumber 12345 ;
schema:hasBacDiveID 167744 ;
schema:hasSpecies "Desulfotomaculum antarcticum" ;
schema:belongsTaxGroup "Bacteria" .
sparql_query_examples:
- title: "Search Media by Keyword"
description: "Find culture media by label/group keywords using bif:contains."
question: "Which media are for marine organisms?"
complexity: "basic"
sparql: |
PREFIX schema: <https://purl.dsmz.de/schema/>
SELECT ?medium ?label ?group
FROM <http://rdfportal.org/dataset/mediadive>
WHERE {
?medium a schema:CultureMedium ;
rdfs:label ?label ;
schema:belongsToGroup ?group .
?label bif:contains "'marine'"
}
ORDER BY ?label
LIMIT 30
- title: "Retrieve Medium Properties"
description: "Get functional properties and growth characteristics."
question: "What are the properties of specific media?"
complexity: "basic"
sparql: |
PREFIX schema: <https://purl.dsmz.de/schema/>
SELECT ?medium ?label ?ph ?isComplex ?docLink
FROM <http://rdfportal.org/dataset/mediadive>
WHERE {
VALUES ?medium { <https://purl.dsmz.de/mediadive/medium/1118> }
?medium a schema:CultureMedium ;
rdfs:label ?label ;
schema:isComplex ?isComplex .
OPTIONAL { ?medium schema:hasFinalPH ?ph }
OPTIONAL { ?medium schema:hasLinkToSource ?docLink }
}
- title: "Search by pH Range"
description: "Find media within specific pH ranges."
question: "Which media have pH between 6.5 and 7.5?"
complexity: "intermediate"
sparql: |
PREFIX schema: <https://purl.dsmz.de/schema/>
SELECT ?medium ?label ?minPH ?maxPH
FROM <http://rdfportal.org/dataset/mediadive>
WHERE {
?medium a schema:CultureMedium ;
rdfs:label ?label ;
schema:hasMinPH ?minPH ;
schema:hasMaxPH ?maxPH .
FILTER(?minPH >= 6.5 && ?maxPH <= 7.5)
}
ORDER BY ?minPH
LIMIT 20
- title: "Growth by Temperature"
description: "Find thermophilic organism conditions."
question: "What growth conditions exist above 45°C?"
complexity: "intermediate"
sparql: |
PREFIX schema: <https://purl.dsmz.de/schema/>
SELECT ?medium ?strain ?temp ?ph ?oxygen
FROM <http://rdfportal.org/dataset/mediadive>
WHERE {
?growth a schema:GrowthCondition ;
schema:partOfMedium ?medium ;
schema:relatedToStrain ?strain ;
schema:growthTemperature ?temp .
OPTIONAL { ?growth schema:growthPH ?ph }
OPTIONAL { ?growth schema:hasOxygenRequirement ?oxygen }
FILTER(?temp > 45)
}
ORDER BY DESC(?temp)
LIMIT 20
- title: "Medium Composition"
description: "Get complete ingredient list with concentrations."
question: "What are the ingredients in a specific medium?"
complexity: "intermediate"
sparql: |
PREFIX schema: <https://purl.dsmz.de/schema/>
SELECT ?medium ?mediumLabel ?ingredient ?ingredientLabel ?gPerL
FROM <http://rdfportal.org/dataset/mediadive>
WHERE {
?composition a schema:MediumComposition ;
schema:partOfMedium ?medium ;
schema:containsIngredient ?ingredient ;
schema:gramsPerLiter ?gPerL .
?medium rdfs:label ?mediumLabel .
?ingredient rdfs:label ?ingredientLabel .
FILTER(?medium = <https://purl.dsmz.de/mediadive/medium/1118>)
}
ORDER BY DESC(?gPerL)
- title: "Ingredient Cross-References"
description: "Link ingredients to chemical databases."
question: "Which ingredients have ChEBI and KEGG?"
complexity: "advanced"
sparql: |
PREFIX schema: <https://purl.dsmz.de/schema/>
SELECT ?ingredient ?label ?chebi ?kegg ?pubchem ?cas
FROM <http://rdfportal.org/dataset/mediadive>
WHERE {
?ingredient a schema:Ingredient ;
rdfs:label ?label .
OPTIONAL { ?ingredient schema:hasChEBI ?chebi }
OPTIONAL { ?ingredient schema:hasKEGG ?kegg }
OPTIONAL { ?ingredient schema:hasPubChem ?pubchem }
OPTIONAL { ?ingredient schema:hasCAS ?cas }
FILTER(BOUND(?chebi) && BOUND(?kegg))
}
ORDER BY ?label
LIMIT 30
- title: "Strain-Medium Compatibility"
description: "Map strains to compatible media with growth indicators."
question: "Which media support specific bacterial strains?"
complexity: "advanced"
sparql: |
PREFIX schema: <https://purl.dsmz.de/schema/>
SELECT ?strain ?species ?medium ?mediumLabel ?temp
FROM <http://rdfportal.org/dataset/mediadive>
WHERE {
?growth a schema:GrowthCondition ;
schema:relatedToStrain ?strain ;
schema:partOfMedium ?medium ;
schema:growthTemperature ?temp ;
schema:hasGrowthIndicator true .
?strain schema:hasSpecies ?species .
?medium rdfs:label ?mediumLabel .
}
ORDER BY ?species ?medium
LIMIT 40
cross_references:
- pattern: "schema:has[Database] properties"
description: "Ingredients link to chemical databases via dedicated properties."
databases:
chemical_identifiers: ["GMO (41%)", "CAS (39%)"]
metabolic: ["ChEBI (32%)", "PubChem (18%)", "KEGG (13%)", "MetaCyc (7%)"]
regional: ["ZVG (15%)"]
sparql: |
SELECT ?ingredient ?cas ?chebi ?kegg ?gmo
FROM <http://rdfportal.org/dataset/mediadive>
WHERE {
?ingredient a schema:Ingredient .
OPTIONAL { ?ingredient schema:hasCAS ?cas }
OPTIONAL { ?ingredient schema:hasChEBI ?chebi }
OPTIONAL { ?ingredient schema:hasKEGG ?kegg }
OPTIONAL { ?ingredient schema:hasGMO ?gmo }
FILTER(BOUND(?cas) || BOUND(?chebi))
} LIMIT 50
- pattern: "schema:hasBacDiveID"
description: "Strains link to BacDive for phenotypic data."
databases:
microbial: ["BacDive (73% of 45,685 strains)"]
sparql: |
SELECT ?strain ?dsmNumber ?bacDiveID ?species
FROM <http://rdfportal.org/dataset/mediadive>
WHERE {
?strain a schema:Strain ;
schema:hasDSMNumber ?dsmNumber ;
schema:hasBacDiveID ?bacDiveID ;
schema:hasSpecies ?species .
} LIMIT 30
- pattern: "schema:hasLinkToSource"
description: "Media link to DSMZ PDF documentation."
databases:
documentation: ["DSMZ PDFs (99% of 3,289 media)"]
sparql: |
SELECT ?medium ?label ?pdfLink
FROM <http://rdfportal.org/dataset/mediadive>
WHERE {
?medium a schema:CultureMedium ;
rdfs:label ?label ;
schema:hasLinkToSource ?pdfLink .
} LIMIT 30
architectural_notes:
schema_design:
- "Medium as central hub: compositions, solutions, growth conditions, steps"
- "Hierarchical recipe: medium → solution → solution_recipe → ingredient"
- "Strain-medium via growth conditions for cultivation compatibility"
performance:
- "Use bif:contains for label/group/species keyword searches"
- "pH/temperature numeric filtering efficient"
- "Composition queries can be large - filter by specific medium"
- "Use OPTIONAL for partial cross-reference coverage"
data_integration:
- "GMO highest coverage (41%) for metabolic context"
- "CAS Registry (39%) for chemical identification"
- "BacDive (73%) for microbial phenotypes"
- "DSMZ PDFs (99%) for authoritative protocols"
data_quality:
- "pH: string ranges (hasFinalPH) + numeric min/max"
- "Cross-reference coverage varies by database"
- "Use isOptionalIngredient flag for flexibility"
data_statistics:
total_media: 3289
total_ingredients: 1489
total_strains: 45685
coverage:
ingredients_gmo: "41%"
ingredients_cas: "39%"
ingredients_chebi: "32%"
strains_bacdive: "73%"
media_pdf_links: "99%"
cardinality:
avg_compositions_per_medium: 21.9
avg_solution_recipes_per_solution: 7.2
performance_characteristics:
- "Temperature/pH numeric filtering efficient"
- "Use bif:contains for keyword searches"
- "Composition queries need medium filters"
- "LIMIT 30-50 recommended"
anti_patterns:
- title: "FILTER Instead of bif:contains"
problem: "Using FILTER CONTAINS for keyword searches is inefficient."
wrong_sparql: |
SELECT ?medium ?label
WHERE {
?medium a schema:CultureMedium ;
rdfs:label ?label .
FILTER(CONTAINS(LCASE(?label), "marine"))
}
correct_sparql: |
SELECT ?medium ?label
WHERE {
?medium a schema:CultureMedium ;
rdfs:label ?label .
?label bif:contains "'marine'"
}
explanation: "Use bif:contains for full-text search - faster and supports boolean operators."
- title: "Missing OPTIONAL for Partial Coverage"
problem: "Requiring all cross-references filters out most ingredients."
wrong_sparql: |
SELECT ?ingredient ?chebi ?kegg ?pubchem
WHERE {
?ingredient a schema:Ingredient ;
schema:hasChEBI ?chebi ;
schema:hasKEGG ?kegg ;
schema:hasPubChem ?pubchem .
}
correct_sparql: |
SELECT ?ingredient ?chebi ?kegg ?pubchem
WHERE {
?ingredient a schema:Ingredient .
OPTIONAL { ?ingredient schema:hasChEBI ?chebi }
OPTIONAL { ?ingredient schema:hasKEGG ?kegg }
OPTIONAL { ?ingredient schema:hasPubChem ?pubchem }
FILTER(BOUND(?chebi) || BOUND(?kegg))
}
explanation: "Use OPTIONAL for properties with partial coverage, then filter for at least one."
- title: "Unbounded Composition Queries"
problem: "Composition queries without medium filter return too many rows."
wrong_sparql: |
SELECT ?composition ?medium ?ingredient ?gPerL
WHERE {
?composition schema:partOfMedium ?medium ;
schema:containsIngredient ?ingredient ;
schema:gramsPerLiter ?gPerL .
}
correct_sparql: |
SELECT ?composition ?medium ?ingredient ?gPerL
WHERE {
?composition schema:partOfMedium ?medium ;
schema:containsIngredient ?ingredient ;
schema:gramsPerLiter ?gPerL .
FILTER(?medium = <https://purl.dsmz.de/mediadive/medium/1118>)
}
LIMIT 50
explanation: "Filter by specific medium or use LIMIT to avoid timeouts."
common_errors:
- error: "Keyword search failures"
causes:
- "Using FILTER CONTAINS instead of bif:contains"
- "Not wrapping keywords in single quotes"
solutions:
- "Use bif:contains for full-text search"
- "Format: ?label bif:contains \"'keyword'\""
- "Boolean: ?label bif:contains \"'keyword1' OR 'keyword2'\""
- error: "Empty cross-reference results"
causes:
- "Requiring all databases (AND logic)"
- "Not using OPTIONAL for partial coverage"
solutions:
- "Use OPTIONAL for each database property"
- "Filter with OR: FILTER(BOUND(?chebi) || BOUND(?kegg))"
- "Coverage: GMO 41%, CAS 39%, ChEBI 32%"
- error: "Query timeout on compositions"
causes:
- "No medium filter with 72K+ compositions"
- "Missing LIMIT clause"
solutions:
- "Filter by specific medium first"
- "Use LIMIT 50-100"
- "Avg 21.9 compositions per medium"