{
"title": "All mitochondrial Complex I inhibitors (AI-augmented PubMed mining)",
"description": "List of small molecules and drugs claimed to inhibit mitochondrial respiratory Complex I (NADH:ubiquinone oxidoreductase). The compounds were aggregated by automated literature mining of PubMed abstracts using generative AI classification. Each entry includes the compound name, number of PubMed references supporting inhibition, known/new status, a confidence category derived from the reference count, and the list of PubMed IDs supporting the assignment.",
"creator": "Daniel Nicorici",
"institution": "University of Helsinki, Klefström Lab",
"date_created": "2025-10-13",
"file_name": "all_mito_complex_I_inhibitors.txt",
"delimiter": "TAB",
"encoding": "UTF-8",
"license": "CC BY 4.0 — attribution required for reuse or redistribution",
"fields": {
"compound": "Canonical compound or small-molecule name as extracted from PubMed abstracts (case-insensitive).",
"pubmed_references": "Integer — number of PubMed abstracts where the compound was identified as inhibiting mitochondrial Complex I (unique PMID count).",
"known_status": "Categorical flag: 'known' for compounds established in the literature; 'new' for compounds newly suggested by the AI-based mining pipeline.",
"confidence": "Categorical — confidence based on 'pubmed_references' bins: low (≤1), low-medium (2), medium (3–5), high (≥6).",
"pubmed_ids": "String — unique supporting PMIDs, sorted ascending and separated by ';'."
},
"data_origin": {
"sources": [
"PubMed baseline and update XMLs (NCBI)",
"AI classification using GPT-4.1-mini (YES/probablyYES → inhibitor)",
"Manual curation of high-confidence outputs",
"Known inhibitors list (data/reference/mitochondrial_complex_i_inhibitors.txt)"
],
"extraction_method": "Generative AI screening of PubMed abstracts mentioning small compounds and Complex I inhibition, followed by aggregation of unique compound names and provenance of supporting PMIDs."
},
"intended_use": [
"Reference set for validation of mitochondrial Complex I inhibitor discovery pipelines.",
"Training/benchmark data for natural product–mitochondrial function studies.",
"Cross-linking with COCONUT, ChEMBL, or Aurora-MCP compound databases."
],
"notes": [
"File is TAB-delimited with a single header row.",
"Numeric column 'pubmed_references' may be capped at 100 for highly cited compounds (e.g., known-reference boost).",
"Entries labeled 'new' have not been manually confirmed in the literature and represent AI-generated predictions.",
"'pubmed_ids' contains unique PMIDs, sorted and ';' separated within the cell.",
"'confidence' is derived from 'pubmed_references' using fixed bins (low ≤1; low-medium 2; medium 3–5; high ≥6).",
"Intended to be used alongside the Aurora-MCP ETL and MCP server environment."
]
}