Skip to main content
Glama
clinvar.yaml16 kB
schema_info: title: ClinVar RDF description: | ClinVar aggregates genomic variation and its relationship to human health with 3.5M+ variant records, clinical interpretations, gene associations, and disease conditions. Main entities include VariationArchiveType (genetic variations), Gene (associated genes), ClinAsserTraitType (disease/phenotype), and ClassifiedRecord (clinical assertions). Cross-referenced to MedGen, OMIM, MeSH, and HGNC. endpoint: https://rdfportal.org/ncbi/sparql base_uri: http://ncbi.nlm.nih.gov/clinvar/ graphs: - http://rdfportal.org/dataset/clinvar version: mie_version: "1.1" mie_created: "2024-12-08" data_version: "Release 2025.01" update_frequency: "Monthly" license: data_license: "Public Domain (CC0)" license_url: "https://www.ncbi.nlm.nih.gov/clinvar/docs/maintenance_use/" access: rate_limiting: "Standard SPARQL endpoint limits" max_query_timeout: "60 seconds" backend: "Virtuoso (supports bif:contains)" shape_expressions: | PREFIX cvo: <http://purl.jp/bio/10/clinvar/> PREFIX med2rdf: <http://med2rdf.org/ontology/med2rdf#> PREFIX sio: <http://semanticscience.org/resource/> PREFIX faldo: <http://biohackathon.org/resource/faldo#> <VariationArchiveShape> { a [ cvo:VariationArchiveType ] ; rdfs:label xsd:string ; cvo:accession xsd:string ; cvo:variation_id xsd:integer ; cvo:variation_name xsd:string ; cvo:variation_type xsd:string ; cvo:species xsd:string ; cvo:record_status xsd:string ; cvo:date_created xsd:date ; cvo:date_last_updated xsd:date ; cvo:number_of_submitters xsd:integer ; cvo:classified_record BNode ? ; med2rdf:disease BNode * } <GeneShape> { a [ cvo:Gene med2rdf:Gene ] ; cvo:gene_id xsd:integer ? ; cvo:symbol xsd:string * ; cvo:full_name xsd:string * ; cvo:hgnc_id xsd:string ? ; cvo:omim xsd:integer ? ; cvo:cytogenetic_location xsd:string * ; faldo:location BNode * } <ClinAsserTraitShape> { a [ cvo:ClinAsserTraitType med2rdf:Disease ] ; cvo:type xsd:string ; cvo:id xsd:integer ? ; dct:references IRI + } <ClassifiedRecordShape> { a [ cvo:ClassifiedRecord sio:SIO_001122 ] ; cvo:classifications BNode ; sio:SIO_000628 IRI * } sample_rdf_entries: - title: "Pathogenic BRCA1 Variant" description: "Frameshift duplication in BRCA1 associated with hereditary breast and ovarian cancer." rdf: | <http://ncbi.nlm.nih.gov/clinvar/variation/856461> a cvo:VariationArchiveType ; rdfs:label "NM_007294.4(BRCA1):c.2244dup (p.Asp749fs)" ; cvo:accession "VCV000856461" ; cvo:variation_id 856461 ; cvo:variation_type "Duplication" ; cvo:species "Homo sapiens" ; cvo:record_status "current" ; cvo:date_created "2022-05-16"^^xsd:date ; cvo:number_of_submitters 1 ; med2rdf:disease _:disease1 . - title: "Gene with Genomic Location" description: "SLCO1B1 gene on chromosome 12p12.1 with HGNC and OMIM identifiers." rdf: | <http://ncbi.nlm.nih.gov/gene/10599> a cvo:Gene, med2rdf:Gene ; cvo:gene_id 10599 ; cvo:symbol "SLCO1B1" ; cvo:full_name "solute carrier organic anion transporter family member 1B1" ; cvo:hgnc_id "HGNC:10959" ; cvo:omim 604843 ; cvo:cytogenetic_location "12p12.1" ; faldo:location _:loc1 . - title: "Single Nucleotide Variant" description: "Missense variant in SLC9A4 with clinical significance classification." rdf: | <http://ncbi.nlm.nih.gov/clinvar/variation/3798403> a cvo:VariationArchiveType ; rdfs:label "NM_001011552.4(SLC9A4):c.1724C>A (p.Ala575Asp)" ; cvo:accession "VCV003798403" ; cvo:variation_type "single nucleotide variant" ; cvo:species "Homo sapiens" ; cvo:record_status "current" ; cvo:classified_record _:classrec1 . - title: "Disease Association" description: "Clinical trait representing disease condition with MedGen cross-reference." rdf: | _:disease1 a cvo:ClinAsserTraitType, med2rdf:Disease ; cvo:type "Disease" ; cvo:id 16789 ; dct:references <http://ncbi.nlm.nih.gov/medgen/C3150901> . - title: "Classified Record" description: "Clinical classification with germline significance assessment." rdf: | _:classrec1 a cvo:ClassifiedRecord, sio:SIO_001122 ; cvo:classifications _:classi1 ; sio:SIO_000628 <http://ncbi.nlm.nih.gov/gene/10599> . _:classi1 cvo:germline_classification _:germ1 . _:germ1 cvo:description "Uncertain significance" . sparql_query_examples: - title: "Search Variants by Gene with bif:contains" description: "Find all variants for BRCA1 gene using full-text search." question: "What variants are recorded for BRCA1?" complexity: "basic" sparql: | PREFIX cvo: <http://purl.jp/bio/10/clinvar/> SELECT ?variant ?label ?type ?status FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?variant a cvo:VariationArchiveType ; rdfs:label ?label ; cvo:variation_type ?type ; cvo:record_status ?status . ?label bif:contains "'BRCA1'" . } LIMIT 100 - title: "Get Variant by Accession" description: "Retrieve variant details using ClinVar accession number." question: "What are details of variant VCV000856461?" complexity: "basic" sparql: | PREFIX cvo: <http://purl.jp/bio/10/clinvar/> SELECT * FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?variant cvo:accession "VCV000856461" ; ?property ?value . } - title: "Find Variants with Clinical Significance" description: "Retrieve variants with their clinical significance classifications." question: "Which variants have clinical significance classifications?" complexity: "intermediate" sparql: | PREFIX cvo: <http://purl.jp/bio/10/clinvar/> SELECT ?variant ?label ?significance FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?variant a cvo:VariationArchiveType ; rdfs:label ?label ; cvo:classified_record ?classrec . ?classrec cvo:classifications/cvo:germline_classification/cvo:description ?significance . } LIMIT 100 - title: "Count Variants by Type" description: "Group and count variants by their variation types." question: "What types of variations exist and how many?" complexity: "intermediate" sparql: | PREFIX cvo: <http://purl.jp/bio/10/clinvar/> SELECT ?variation_type (COUNT(?variant) as ?count) FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?variant a cvo:VariationArchiveType ; cvo:variation_type ?variation_type ; cvo:record_status "current" . } GROUP BY ?variation_type ORDER BY DESC(?count) LIMIT 50 - title: "Gene-Disease Associations via Variants" description: "Find gene-disease associations through variant annotations using keyword search." question: "What diseases are associated with TP53 variants?" complexity: "intermediate" sparql: | PREFIX cvo: <http://purl.jp/bio/10/clinvar/> PREFIX med2rdf: <http://med2rdf.org/ontology/med2rdf#> PREFIX sio: <http://semanticscience.org/resource/> SELECT DISTINCT ?variant ?label ?gene ?disease_name FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?variant a cvo:VariationArchiveType ; rdfs:label ?label ; med2rdf:disease ?disease ; cvo:classified_record/sio:SIO_000628 ?gene . ?disease cvo:type "Disease" ; cvo:name/rdfs:label ?disease_name . ?label bif:contains "'TP53'" . } LIMIT 100 - title: "Well-Studied Recent Variants" description: "Identify variants with multiple submitters and recent updates." question: "Which variants have multiple submissions and were recently updated?" complexity: "advanced" sparql: | PREFIX cvo: <http://purl.jp/bio/10/clinvar/> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> SELECT ?variant ?label ?num_submitters ?last_updated ?type FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?variant a cvo:VariationArchiveType ; rdfs:label ?label ; cvo:number_of_submitters ?num_submitters ; cvo:date_last_updated ?last_updated ; cvo:variation_type ?type . FILTER(?num_submitters >= 3) FILTER(?last_updated >= "2024-01-01"^^xsd:date) } ORDER BY DESC(?num_submitters) DESC(?last_updated) LIMIT 100 - title: "Genes with Chromosomal Locations" description: "Retrieve genes with cytogenetic locations and external identifiers." question: "What genes have chromosomal locations and HGNC identifiers?" complexity: "advanced" sparql: | PREFIX cvo: <http://purl.jp/bio/10/clinvar/> SELECT DISTINCT ?gene ?symbol ?full_name ?cyto_loc ?hgnc ?omim FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?gene a cvo:Gene ; cvo:symbol ?symbol ; cvo:full_name ?full_name ; cvo:cytogenetic_location ?cyto_loc . OPTIONAL { ?gene cvo:hgnc_id ?hgnc } OPTIONAL { ?gene cvo:omim ?omim } FILTER(REGEX(?cyto_loc, "^[0-9]+[pq]")) } ORDER BY ?cyto_loc LIMIT 100 cross_references: - pattern: "rdfs:seeAlso" description: "Variants and genes link to ClinVar web interface and HGNC." databases: primary: ["ClinVar Web (variants)", "HGNC (genes)"] sparql: | SELECT ?variant ?accession ?url FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?variant a cvo:VariationArchiveType ; cvo:accession ?accession ; rdfs:seeAlso ?url . } LIMIT 10 - pattern: "dct:references" description: "Disease/trait annotations reference MedGen, OMIM, and MeSH." databases: biomedical: ["MedGen (~95%)", "OMIM (~40%)", "MeSH (~30%)"] sparql: | SELECT DISTINCT ?disease ?ref FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?disease a med2rdf:Disease ; dct:references ?ref . FILTER(CONTAINS(STR(?ref), "medgen") || CONTAINS(STR(?ref), "omim")) } LIMIT 50 - pattern: "cvo:hgnc_id" description: "Gene entities have HGNC identifiers for official gene symbols." databases: genomics: ["HGNC (~100% human genes)"] sparql: | SELECT ?gene ?symbol ?hgnc_id FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?gene a cvo:Gene ; cvo:symbol ?symbol ; cvo:hgnc_id ?hgnc_id . } LIMIT 20 - pattern: "cvo:omim" description: "Direct OMIM identifiers on Gene entities linking to disease entries." databases: genetics: ["OMIM (~4,000 genes)"] sparql: | SELECT ?gene ?symbol ?omim_id FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?gene a cvo:Gene ; cvo:symbol ?symbol ; cvo:omim ?omim_id . } LIMIT 50 architectural_notes: schema_design: - "VariationArchiveType central entity with VCV accessions" - "Classified records contain clinical assertions and interpretations" - "Disease associations via blank nodes linking to dct:references" - "Gene entities with calculated and submitted relationships" - "FALDO ontology for genomic coordinates" performance: - "Use bif:contains for gene symbol and variant name searches" - "Direct property filters (variation_type, record_status) efficient" - "Complex joins through blank nodes may be slow" - "Always use LIMIT for exploratory queries" - "Counting queries complete in ~1-3 seconds" data_integration: - "MedGen for standardized disease concepts" - "HGNC for gene symbol standardization" - "OMIM for Mendelian inheritance" - "MeSH for clinical concept mapping" data_quality: - "Record_status 'current' filters deprecated entries" - "Number of submitters indicates evidence strength" - "Some variants lack disease/clinical significance" - "Dates use both xsd:date and xsd:string" data_statistics: total_variations: 3588969 total_genes: 20000 coverage: clinical_significance: "~90%" disease_associations: "~75%" gene_associations: "~85%" external_refs: "~95% MedGen" cardinality: avg_submitters_per_variant: 1.2 avg_diseases_per_variant: 1.5 avg_locations_per_gene: 15.3 performance_characteristics: - "Simple property queries: <1s" - "bif:contains searches: 1-3s" - "Complex blank node joins: 3-10s" - "Aggregation queries: 5-15s" anti_patterns: - title: "Missing Graph Specification" problem: "Queries without FROM clause may return incomplete results or timeout." wrong_sparql: | SELECT ?s ?p ?o WHERE { ?s a cvo:VariationArchiveType . ?s ?p ?o . } LIMIT 10 correct_sparql: | SELECT ?s ?p ?o FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?s a cvo:VariationArchiveType . ?s ?p ?o . } LIMIT 10 explanation: "Always specify FROM <http://rdfportal.org/dataset/clinvar>." - title: "FILTER CONTAINS vs bif:contains" problem: "FILTER CONTAINS is slower than bif:contains." wrong_sparql: | SELECT ?variant ?label FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?variant rdfs:label ?label . FILTER(CONTAINS(?label, "BRCA1")) } correct_sparql: | SELECT ?variant ?label FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?variant rdfs:label ?label . ?label bif:contains "'BRCA1'" . } explanation: "Use bif:contains with single-quoted keywords for better performance." - title: "Blank Node Chains Without OPTIONAL" problem: "Inner joins through blank nodes filter out variants lacking annotations." wrong_sparql: | SELECT ?var ?sig FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?var cvo:classified_record/cvo:classifications/cvo:germline_classification/cvo:description ?sig . } correct_sparql: | SELECT ?var ?sig FROM <http://rdfportal.org/dataset/clinvar> WHERE { ?var a cvo:VariationArchiveType . OPTIONAL { ?var cvo:classified_record/cvo:classifications/cvo:germline_classification/cvo:description ?sig . } } explanation: "Use OPTIONAL for blank node chains to include variants with missing data." common_errors: - error: "Query timeout on aggregation" causes: - "Counting 3.5M+ variants without filters" - "Complex blank node property paths" - "Missing LIMIT clause" solutions: - "Add filters (variation_type, record_status, dates)" - "Use LIMIT even on COUNT queries" - "Break complex queries into smaller steps" - error: "Empty results for cross-references" causes: - "Querying external URIs not in graph as subjects" - "Not following blank node chains to dct:references" solutions: - "Use disease/trait blank nodes to find dct:references" - "Check rdfs:seeAlso on main entities" - "Sample data first to understand URI patterns" - error: "Inconsistent date filtering" causes: - "Mixed xsd:date and xsd:string datatypes" - "Same property may have different datatypes" solutions: - "Use cvo:date_created (xsd:date) not dct:created (xsd:string)" - "Check datatype with sample queries first" - "Cast or convert when necessary"

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arkinjo/togo-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server