opengenes_get_schema_info
Retrieve database schema details to understand data structure and relationships for querying aging and longevity research datasets.
Instructions
Get information about the database schema
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
No arguments | |||
Implementation Reference
- src/opengenes_mcp/server.py:168-169 (registration)Registers the 'opengenes_get_schema_info' tool using FastMCP decorator, binding it to the get_schema_info method. Prefix 'opengenes_' is used from class init.self.tool(name=f"{self.prefix}get_schema_info", description="Get information about the database schema")(self.get_schema_info) self.tool(name=f"{self.prefix}example_queries", description="Get a list of example SQL queries")(self.get_example_queries)
- src/opengenes_mcp/server.py:290-471 (handler)The primary handler function that dynamically queries the database schema using PRAGMA table_info, constructs comprehensive schema information including table/column details, custom descriptions, critical query guidelines for multi-value fields and lifespan metrics, cross-table analysis patterns, and returns a structured dictionary with all schema data.def get_schema_info(self) -> Dict[str, Any]: """ Get comprehensive information about the OpenGenes database schema including table structures, column descriptions, enumerations, and critical query guidelines. Returns: Dict containing detailed table schemas, column information, query guidelines, and available enumerations """ with start_action(action_type="get_schema_info") as action: # Get table information tables_query = "SELECT name FROM sqlite_master WHERE type='table'" tables_result = self.db_manager.execute_query(tables_query) table_names = [row['name'] for row in tables_result.rows] action.add_success_fields(tables_found=len(table_names), table_names=table_names) schema_info = { "database_overview": { "description": "OpenGenes database contains aging and lifespan research data with 4 main tables linked by HGNC gene symbols", "total_tables": len(table_names), "primary_key": "HGNC (gene symbol) - links all tables together" }, "critical_query_guidelines": { "multi_value_fields": { "description": "Some columns contain comma-separated values. ALWAYS use LIKE queries with wildcards for these fields.", "fields": [ "gene_hallmarks.'hallmarks of aging' - contains multiple aging hallmarks per gene", "lifespan_change.intervention_deteriorates - multiple biological processes that deteriorate", "lifespan_change.intervention_improves - multiple biological processes that improve" ], "example_syntax": "WHERE \"hallmarks of aging\" LIKE '%stem cell exhaustion%'" }, "lifespan_metrics": { "description": "Database contains both mean and maximum lifespan change metrics. When user asks about lifespan effects without specifying, show both.", "mean_vs_max": "lifespan_percent_change_mean shows average effect, lifespan_percent_change_max shows maximum observed effect", "when_to_show_both": "If user asks about 'lifespan effects' or 'lifespan changes' without specifying mean vs max, include both metrics", "ordering_both": "Use COALESCE(lifespan_percent_change_mean, lifespan_percent_change_max) for ordering when showing both", "significance": "Both mean and max have corresponding significance columns (significance_mean, significance_max)" }, "result_ordering": { "lifespan_extension": "ORDER BY lifespan_percent_change_mean DESC (highest increase first)", "lifespan_reduction": "ORDER BY lifespan_percent_change_mean ASC (largest decrease first)", "importance": "Always order lifespan results by magnitude of effect for relevance", "both_metrics": "When showing both mean and max, use COALESCE for ordering or show comparison" }, "comprehensive_aging_evidence": { "description": "For questions about aging evidence, link to aging, or longevity associations for a gene, query ALL 4 tables for complete evidence", "required_tables": "1) gene_criteria (aging-related criteria), 2) gene_hallmarks (aging pathways), 3) lifespan_change (experimental effects), 4) longevity_associations (human population studies)", "example_patterns": "Evidence of X and aging, Link between X and aging, X gene aging associations, What evidence links X to aging", "critical_note": "Do NOT omit longevity_associations table - it contains crucial human population genetics data" }, "gene_queries": "Use HGNC column for gene symbols (TP53, FOXO3, etc.)", "safety": "Only SELECT queries allowed - no INSERT, UPDATE, DELETE, or DDL operations" }, "tables": {}, "enumerations": {}, "biological_processes_tags": [ "cardiovascular system", "nervous system", "immune function", "muscle, bone, skin, liver", "renal function, reproductive function", "cognitive function, eyesight, hair/coat", "body composition", "glucose metabolism, lipid metabolism, cholesterol metabolism", "insulin sensitivity", "oxidation/antioxidant function, mitochondrial function", "DNA metabolism, carcinogenesis, apoptosis", "senescence, inflammation, stress responce", "autophagy, proliferation, locomotor function", "tissue regeneration, stem and progenitor cells", "blood, proteostasis, angiogenesis, metabolism", "endocrine system, intercellular matrix", "building and protection of telomeres", "cytoskeleton organization, nucleus structure", "skin and the intestine epithelial barriers function", "calcium homeostasis, proteolysis" ], "aging_hallmarks_tags": [ "nuclear DNA instability", "telomere attrition", "alterations in histone modifications", "chromatin remodeling", "transcriptional alterations", "alterations in DNA methylation", "degradation of proteolytic systems", "TOR pathway dysregulation", "INS/IGF-1 pathway dysregulation", "AMPK pathway dysregulation", "SIRT pathway dysregulation", "impairment of the mitochondrial integrity and biogenesis", "mitochondrial DNA instability", "accumulation of reactive oxygen species", "senescent cells accumulation", "stem cell exhaustion", "sterile inflammation", "intercellular communication impairment", "changes in the extracellular matrix structure", "impairment of proteins folding and stability", "nuclear architecture impairment", "disabled macroautophagy" ] } # Get detailed column information for each table with descriptions table_descriptions = { "lifespan_change": { "purpose": "Experimental data on how gene modifications affect lifespan in various model organisms", "key_columns": "HGNC, model_organism, effect_on_lifespan, intervention methods, lifespan measurements", "use_cases": "Questions about gene effects on lifespan, experimental conditions, organism studies", "special_notes": "Contains multi-value fields for intervention effects. Use LIKE queries for intervention_deteriorates and intervention_improves columns." }, "gene_criteria": { "purpose": "Aging-related criteria that genes meet (12 different categories)", "key_columns": "HGNC, criteria", "use_cases": "Questions about why genes are considered aging-related", "special_notes": "Links genes to specific aging research criteria classifications" }, "gene_hallmarks": { "purpose": "Links genes to hallmarks of aging", "key_columns": "HGNC, hallmarks of aging (multi-value field)", "use_cases": "Questions about which aging hallmarks genes are involved in", "special_notes": "CRITICAL: 'hallmarks of aging' column contains comma-separated values. Always use LIKE queries with wildcards." }, "longevity_associations": { "purpose": "Population genetics data on gene variants associated with longevity", "key_columns": "HGNC, polymorphism data, ethnicity, study type", "use_cases": "Questions about genetic variants associated with longevity in human populations", "special_notes": "Contains SNPs, indels, and other genetic variations from population studies" }, "comprehensive_aging_evidence": { "purpose": "IMPORTANT: When users ask about 'evidence of aging', 'link to aging/longevity', or 'aging associations' for a gene, query ALL 4 tables for complete evidence", "recommended_approach": "For comprehensive aging evidence, combine data from: 1) gene_criteria (why gene is aging-related), 2) gene_hallmarks (aging pathways involved), 3) lifespan_change (experimental effects), 4) longevity_associations (human population studies)", "example_question_patterns": "What evidence links X to aging?, Evidence of X and aging, X gene and longevity associations, Link between X and aging", "critical_note": "Do not just query experimental tables (lifespan_change) - include population genetics data (longevity_associations) for complete evidence" } } for table_name in table_names: pragma_query = f"PRAGMA table_info({table_name})" columns_result = self.db_manager.execute_query(pragma_query) # Add detailed column descriptions for lifespan_change table column_descriptions = {} if table_name == "lifespan_change": column_descriptions = { "HGNC": "Gene symbol (standard gene names like TP53, FOXO3)", "model_organism": "Organism used for experiment (mouse, C. elegans, fly, etc.)", "sex": "Sex of organism used (male, female, all, hermaphrodites, etc.)", "effect_on_lifespan": "Direction of lifespan change (increases/decreases/no change)", "lifespan_percent_change_mean": "Mean percent change in lifespan (average effect across cohort - use for ordering results)", "lifespan_percent_change_max": "Maximum percent change in lifespan (best individual response - show both mean and max when user asks about lifespan effects)", "lifespan_percent_change_median": "Median percent change in lifespan", "intervention_deteriorates": "MULTI-VALUE: Biological processes that deteriorated (use LIKE queries)", "intervention_improves": "MULTI-VALUE: Biological processes that improved (use LIKE queries)", "intervention_method": "Method used to modify gene (knockout, overexpression, etc.)", "main_effect_on_lifespan": "Type of gene activity change (gain/loss/switch of function)", "significance_mean": "Statistical significance of mean lifespan change (1=significant, 0=not significant)", "significance_max": "Statistical significance of maximum lifespan change (1=significant, 0=not significant)", "control_lifespan_mean": "Mean lifespan of control group", "experiment_lifespan_mean": "Mean lifespan of experimental group" } elif table_name == "gene_hallmarks": column_descriptions = { "HGNC": "Gene symbol (standard gene names like TP53, FOXO3)", "hallmarks of aging": "MULTI-VALUE: Comma-separated aging hallmarks (ALWAYS use LIKE queries with wildcards)" } elif table_name == "gene_criteria": column_descriptions = { "HGNC": "Gene symbol (standard gene names like TP53, FOXO3)", "criteria": "Aging-related criteria the gene meets (12 different categories)" } elif table_name == "longevity_associations": column_descriptions = { "HGNC": "Gene symbol (standard gene names like TP53, FOXO3)", "polymorphism type": "Type of genetic variant (SNP, In/Del, VNTR, etc.)", "polymorphism id": "Identifier for the genetic variant (e.g., rs numbers for SNPs)", "nucleotide substitution": "DNA sequence change for the variant", "amino acid substitution": "Protein sequence change caused by the variant", "polymorphism — other": "Additional polymorphism details", "ethnicity": "Ethnicity of study participants", "study type": "Type of population study (GWAS, candidate genes, meta-analysis, etc.)", "sex": "Sex of study participants", "doi": "DOI of the research publication", "pmid": "PubMed ID of the research publication" } schema_info["tables"][table_name] = { "description": table_descriptions.get(table_name, {}), "columns": [ { "name": col["name"], "type": col["type"], "nullable": not col["notnull"], "primary_key": bool(col["pk"]), "description": column_descriptions.get(col["name"], "") } for col in columns_result.rows ] } # Add comprehensive enumerations schema_info["enumerations"] = self._get_known_enumerations() action.add_success_fields(schema_retrieved=True, total_tables=len(table_names)) return schema_info
- src/opengenes_mcp/server.py:96-141 (helper)Helper class used by get_schema_info to execute the schema queries (sqlite_master for tables, PRAGMA table_info for columns) in read-only mode.class DatabaseManager: """Manages SQLite database connections and queries.""" def __init__(self, db_path: Path): self.db_path = db_path if not self.db_path.exists(): raise FileNotFoundError(f"Database not found at {self.db_path}") def execute_query(self, sql: str, params: Optional[tuple] = None) -> QueryResult: """Execute a read-only SQL query and return results.""" with start_action(action_type="execute_query", sql=sql, params=params) as action: # Execute query using read-only connection - SQLite will enforce read-only at database level # Using URI format with mode=ro for true read-only access readonly_uri = f"file:{self.db_path}?mode=ro" try: with sqlite3.connect(readonly_uri, uri=True) as conn: conn.row_factory = sqlite3.Row # This allows dict-like access to rows cursor = conn.cursor() if params: cursor.execute(sql, params) else: cursor.execute(sql) rows = cursor.fetchall() # Convert sqlite3.Row objects to dictionaries rows_dicts = [dict(row) for row in rows] result = QueryResult( rows=rows_dicts, count=len(rows_dicts), query=sql ) action.add_success_fields(rows_count=len(rows_dicts)) return result except sqlite3.OperationalError as e: if "readonly database" in str(e).lower(): error_msg = f"Write operation attempted on read-only database. Rejected query: {sql}" action.log(message_type="query_rejected", error=error_msg, rejected_query=sql) raise ValueError(error_msg) from e else: # Re-raise other operational errors raise
- src/opengenes_mcp/server.py:145-165 (registration)Class initialization where prefix='opengenes_' is set and _register_opengenes_tools() is called to register the tool.def __init__( self, name: str = "OpenGenes MCP Server", db_path: Path = DB_PATH, prefix: str = "opengenes_", huge_query_tool: bool = False, **kwargs ): """Initialize the OpenGenes tools with a database manager and FastMCP functionality.""" # Initialize FastMCP with the provided name and any additional kwargs super().__init__(name=name, **kwargs) # Initialize our database manager self.db_manager = DatabaseManager(db_path) self.prefix = prefix self.huge_query_tool = huge_query_tool # Register our tools and resources self._register_opengenes_tools() self._register_opengenes_resources()
- src/opengenes_mcp/server.py:90-95 (schema)Pydantic schema for query results, used indirectly as get_schema_info calls execute_query which returns QueryResult.class QueryResult(BaseModel): """Result from a database query.""" rows: List[Dict[str, Any]] = Field(description="Query result rows") count: int = Field(description="Number of rows returned") query: str = Field(description="The SQL query that was executed")