MCP Server Proto-OKN

mcp-proto-okn
scripts

build_registry.py

build_registry.py•11.2 KiB

#!/usr/bin/env python3 """ Build the graph registry (config/registry.json) from existing metadata sources. Sources: - metadata/descriptions/{kg_name}.txt → description_summary - metadata/entities/{kg_name}_entities.csv → entity_types - config/mcp.json → canonical list of graph names and endpoint URLs - Hardcoded domain tag mapping and identifier namespace mapping """ import csv import json import os import sys # Project root ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Domain tag mapping (from README domain table and analysis) DOMAIN_TAGS = { "biobricks-aopwiki": ["toxicology", "biology", "health"], "biobricks-ice": ["toxicology", "chemistry", "biology"], "biobricks-mesh": ["biology", "health", "vocabulary"], "biobricks-pubchem-annotations": ["chemistry", "toxicology", "pharmacology"], "biobricks-tox21": ["toxicology", "chemistry"], "biobricks-toxcast": ["toxicology", "chemistry"], "biohealth": ["biology", "health", "social_determinants"], "climatemodelskg": ["climate", "environment", "geospatial"], "dreamkg": ["social_services", "homelessness"], "fiokg": ["environment", "regulatory", "industry"], "gene-expression-atlas-okn": ["genomics", "biology", "health"], "geoconnex": ["hydrology", "geospatial", "environment"], "hydrologykg": ["hydrology", "environment", "water_quality"], "nasa-gesdisc-kg": ["climate", "earth_science", "geospatial"], "nde": ["infectious_disease", "health", "data_discovery"], "nikg": ["public_safety", "urban_planning", "geospatial"], "ruralkg": ["rural_health", "health", "criminal_justice"], "sawgraph": ["food_safety", "water_quality", "PFAS", "environment"], "scales": ["criminal_justice", "legal"], "securechainkg": ["software_security", "supply_chain"], "sockg": ["agriculture", "soil_science", "climate"], "spatialkg": ["geospatial", "administrative_boundaries"], "spoke-genelab": ["genomics", "space_biology", "biology"], "spoke-okn": ["biology", "health", "chemistry", "environment", "geospatial"], "sudokn": ["manufacturing", "supply_chain"], "ufokn": ["urban_flooding", "infrastructure", "emergency_response"], "wildlifekn": ["wildlife", "biodiversity", "conservation"], } # Identifier namespace mapping IDENTIFIER_NAMESPACES = { "biobricks-aopwiki": ["CAS", "ChEBI", "ChEMBL", "PubChem", "InChIKey"], "biobricks-ice": ["DTXSID", "NCBI_Gene", "InChIKey", "CAS"], "biobricks-mesh": ["MeSH"], "biobricks-pubchem-annotations": ["PubChem", "InChI", "InChIKey", "SMILES"], "biobricks-tox21": ["CAS"], "biobricks-toxcast": ["DTXSID", "InChIKey", "CAS"], "biohealth": ["MONDO", "MeSH", "UMLS"], "climatemodelskg": ["GeoNames"], "dreamkg": [], "fiokg": ["NAICS", "S2Cell", "FIPS"], "gene-expression-atlas-okn": ["NCBI_Gene", "Ensembl", "GeneSymbol", "UBERON", "CL", "GO"], "geoconnex": ["Geoconnex"], "hydrologykg": ["NHDPlus_COMID", "FIPS", "S2Cell"], "nasa-gesdisc-kg": [], "nde": ["PubMed"], "nikg": ["FIPS"], "ruralkg": ["FIPS", "RUCC"], "sawgraph": ["CAS"], "scales": [], "securechainkg": ["CVE", "CPE"], "sockg": [], "spatialkg": ["S2Cell", "FIPS"], "spoke-genelab": ["NCBI_Gene", "GeneSymbol", "UBERON", "CL"], "spoke-okn": ["Ensembl", "MONDO", "ChEBI", "InChIKey", "FIPS"], "sudokn": ["NAICS"], "ufokn": ["S2Cell"], "wildlifekn": [], } # Example queries per graph EXAMPLE_QUERIES = { "spoke-okn": [ "What drugs treat rheumatoid arthritis?", "What genes are associated with Crohn's disease?", "What is the prevalence of diabetes in California counties?", "What compounds are found in water supplies in Texas?", ], "spoke-genelab": [ "What genes are differentially expressed in spaceflight experiments?", "What are the mouse orthologs of human disease genes?", "What methylation changes occur in spaceflight studies?", ], "gene-expression-atlas-okn": [ "What genes are differentially expressed in breast cancer?", "What tissues show high expression of BRCA1?", ], "biobricks-tox21": [ "What chemicals have been tested in Tox21 assays?", ], "biobricks-ice": [ "What bioassays are available for a specific chemical?", "What is the toxicity profile of bisphenol A?", ], "biobricks-toxcast": [ "What high-throughput screening results exist for PFAS chemicals?", ], "biobricks-aopwiki": [ "What adverse outcome pathways involve estrogen receptor activation?", ], "sawgraph": [ "Where have PFAS been detected in drinking water?", "What food products contain contaminants?", ], "ruralkg": [ "What substance abuse treatment providers are in rural counties?", "What is the relationship between rurality and mental health services?", ], "scales": [ "How many criminal cases were filed in federal court?", ], "dreamkg": [ "What social services are available for homeless individuals?", ], "biohealth": [ "What social determinants are associated with diabetes?", ], "nikg": [ "What incidents occurred in specific neighborhoods?", ], "geoconnex": [ "What monitoring sites exist in a watershed?", ], "hydrologykg": [ "What is the hydrological connectivity between surface water features?", ], "climatemodelskg": [ "What climate models cover a specific region?", ], "securechainkg": [ "What vulnerabilities affect a Python package?", ], "sockg": [ "What soil carbon measurements exist for different tillage practices?", ], "sudokn": [ "What manufacturers have specific process capabilities?", ], "spatialkg": [ "What administrative regions contain a specific location?", ], "fiokg": [ "What regulated facilities exist in a county?", ], } # Aliases: map mcp.json names to entity file names ALIASES = { "spoke": "spoke-okn", "gene-expression-altlas-okn": "gene-expression-atlas-okn", # typo in mcp.json } def load_mcp_config(): """Load graph names and endpoints from mcp.json.""" config_path = os.path.join(ROOT, "config", "mcp.json") with open(config_path) as f: config = json.load(f) graphs = {} for name, server in config.get("servers", {}).items(): args = server.get("args", []) endpoint_url = None for i, arg in enumerate(args): if arg == "--endpoint" and i + 1 < len(args): endpoint_url = args[i + 1] break if endpoint_url: # Resolve canonical name canonical = ALIASES.get(name, name) if canonical not in graphs: graphs[canonical] = { "endpoint_url": endpoint_url, "mcp_name": name, } return graphs def load_description(kg_name): """Load description from metadata/descriptions/{kg_name}.txt.""" desc_path = os.path.join(ROOT, "metadata", "descriptions", f"{kg_name}.txt") if os.path.exists(desc_path): with open(desc_path) as f: return f.read().strip() return "" def load_entities(kg_name): """Load entity types from metadata/entities/{kg_name}_entities.csv.""" entity_path = os.path.join(ROOT, "metadata", "entities", f"{kg_name}_entities.csv") if not os.path.exists(entity_path): return {"classes": [], "predicates": [], "has_edge_properties": False} classes = [] predicates = [] has_edge_properties = False with open(entity_path) as f: reader = csv.DictReader(f) for row in reader: entity_type = row.get("Type", "").strip() label = row.get("Label", "").strip() if not label: continue if entity_type == "Class": if label not in classes: classes.append(label) elif entity_type == "Predicate": if label not in predicates: predicates.append(label) elif entity_type == "EdgeProperty": has_edge_properties = True return { "classes": classes, "predicates": predicates, "has_edge_properties": has_edge_properties, } def build_registry(): """Build the complete registry.""" mcp_graphs = load_mcp_config() registry = [] # Collect all canonical names from entity files + mcp config entity_dir = os.path.join(ROOT, "metadata", "entities") entity_names = set() if os.path.exists(entity_dir): for f in os.listdir(entity_dir): if f.endswith("_entities.csv"): name = f.replace("_entities.csv", "") entity_names.add(name) # Merge with mcp config names all_names = set(mcp_graphs.keys()) | entity_names # Remove non-FRINK endpoints non_frink = set() for name in all_names: info = mcp_graphs.get(name, {}) url = info.get("endpoint_url", "") if url and "frink.apps.renci.org" not in url: non_frink.add(name) all_names -= non_frink for kg_name in sorted(all_names): info = mcp_graphs.get(kg_name, {}) endpoint_url = info.get("endpoint_url", f"https://frink.apps.renci.org/{kg_name}/sparql") # Build named graph URI named_graph_uri = f"https://purl.org/okn/frink/kg/{kg_name}" # Build display name display_name = kg_name.replace("-", " ").title() # Load metadata description = load_description(kg_name) entities = load_entities(kg_name) domain_tags = DOMAIN_TAGS.get(kg_name, []) id_namespaces = IDENTIFIER_NAMESPACES.get(kg_name, []) examples = EXAMPLE_QUERIES.get(kg_name, []) # Build aliases list aliases = [] for alias, canonical in ALIASES.items(): if canonical == kg_name and alias != kg_name: aliases.append(alias) # Also add the mcp_name if different mcp_name = info.get("mcp_name") if mcp_name and mcp_name != kg_name and mcp_name not in aliases: aliases.append(mcp_name) entry = { "name": kg_name, "display_name": display_name, "named_graph_uri": named_graph_uri, "endpoint_url": endpoint_url, "domain_tags": domain_tags, "description_summary": description, "entity_types": entities, "identifier_namespaces": id_namespaces, "example_queries": examples, } if aliases: entry["aliases"] = aliases registry.append(entry) return registry def main(): registry = build_registry() output_path = os.path.join(ROOT, "config", "registry.json") with open(output_path, "w") as f: json.dump(registry, f, indent=2) print(f"Built registry with {len(registry)} graphs → {output_path}") if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sbl-sdsc/mcp-proto-okn'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

build_registry.py•11.2 KiB