Skip to main content
Glama
test_extract_gene_aa_change.py5.84 kB
"""Tests for _extract_gene_aa_change method in external.py.""" import pytest from biomcp.variants.external import ExternalVariantAggregator class TestExtractGeneAAChange: """Test the _extract_gene_aa_change method.""" @pytest.fixture def aggregator(self): """Create an ExternalVariantAggregator instance.""" return ExternalVariantAggregator() def test_extract_from_docm(self, aggregator): """Test extraction from DOCM data.""" variant_data = {"docm": {"gene": "BRAF", "aa_change": "p.V600E"}} result = aggregator._extract_gene_aa_change(variant_data) assert result == "BRAF V600E" def test_extract_from_hgvsp_long_format(self, aggregator): """Test extraction from hgvsp with long amino acid names.""" variant_data = { "cadd": {"gene": {"genename": "TP53"}}, "hgvsp": ["p.Arg175His"], } result = aggregator._extract_gene_aa_change(variant_data) # The code doesn't convert all long forms, just checks for Val/Ala assert result == "TP53 Arg175His" def test_extract_from_hgvsp_with_dbnsfp(self, aggregator): """Test extraction from hgvsp with dbnsfp gene name.""" variant_data = { "dbnsfp": {"genename": "EGFR"}, "hgvsp": ["p.Leu858Arg"], } result = aggregator._extract_gene_aa_change(variant_data) # The code doesn't convert Leu/Arg to L/R assert result == "EGFR Leu858Arg" def test_extract_from_cadd_data(self, aggregator): """Test extraction from CADD annotations.""" variant_data = { "cadd": { "gene": {"genename": "KRAS", "prot": {"protpos": 12}}, "oaa": "G", "naa": "D", } } result = aggregator._extract_gene_aa_change(variant_data) assert result == "KRAS G12D" def test_extract_from_docm_without_p_prefix(self, aggregator): """Test extraction from DOCM without p. prefix.""" variant_data = {"docm": {"gene": "PIK3CA", "aa_change": "E545K"}} result = aggregator._extract_gene_aa_change(variant_data) assert result == "PIK3CA E545K" def test_extract_with_multiple_hgvsp(self, aggregator): """Test handling of multiple hgvsp entries - should take first.""" variant_data = { "cadd": {"gene": {"genename": "BRCA1"}}, "hgvsp": ["p.Gln1756Ter", "p.Gln1756*"], } result = aggregator._extract_gene_aa_change(variant_data) # Takes the first one, doesn't convert Gln/Ter assert result == "BRCA1 Gln1756Ter" def test_extract_with_special_characters(self, aggregator): """Test extraction with special characters in protein change.""" variant_data = { "cadd": {"gene": {"genename": "MLH1"}}, "hgvsp": ["p.Lys618Alafs*9"], } result = aggregator._extract_gene_aa_change(variant_data) # Should extract the basic AA change pattern assert result is not None assert "MLH1" in result def test_extract_no_gene_name(self, aggregator): """Test when gene name is missing.""" variant_data = {"hgvsp": ["p.Val600Glu"]} result = aggregator._extract_gene_aa_change(variant_data) assert result is None def test_extract_no_aa_change(self, aggregator): """Test when AA change is missing.""" variant_data = {"cadd": {"gene": {"genename": "BRAF"}}} result = aggregator._extract_gene_aa_change(variant_data) assert result is None def test_extract_empty_variant_data(self, aggregator): """Test with empty variant data.""" result = aggregator._extract_gene_aa_change({}) assert result is None def test_extract_malformed_hgvsp(self, aggregator): """Test with malformed HGVS protein notation.""" variant_data = { "clinvar": { "gene": {"symbol": "MYC"}, "hgvs": {"protein": ["invalid_format"]}, } } result = aggregator._extract_gene_aa_change(variant_data) assert result is None def test_extract_priority_order(self, aggregator): """Test that DOCM is prioritized for AA change, CADD for gene name.""" variant_data = { "docm": {"gene": "BRAF", "aa_change": "p.V600E"}, "hgvsp": ["p.Val600Lys"], # Different change "cadd": { "gene": {"genename": "WRONG", "prot": {"protpos": 600}}, "oaa": "V", "naa": "K", }, } result = aggregator._extract_gene_aa_change(variant_data) # CADD is prioritized for gene name, DOCM for AA change assert result == "WRONG V600E" def test_extract_regex_with_val_ala(self, aggregator): """Test regex extraction when Val/Ala are present.""" # The code specifically looks for Val or Ala to trigger regex variant_data = { "cadd": {"gene": {"genename": "TEST1"}}, "hgvsp": ["p.Val600Ala"], } result = aggregator._extract_gene_aa_change(variant_data) # The regex doesn't find a match in "Val600Ala" because it's looking for [A-Z]\d+[A-Z] # which would match "V600A" but not "Val600Ala" assert result == "TEST1 Val600Ala" def test_extract_handles_exceptions_gracefully(self, aggregator): """Test that exceptions are handled gracefully.""" # This should trigger an exception internally but return None variant_data = { "cadd": {"gene": {"genename": "GENE"}}, "hgvsp": None, # This will cause issues } result = aggregator._extract_gene_aa_change(variant_data) assert result is None

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/genomoncology/biomcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server