Skip to main content
Glama
test_bigquery.py10.6 kB
"""Tests for m4.core.backends.bigquery module. Tests cover: - BigQueryBackend initialization - Project ID resolution - Query execution (mocked) - Table operations (mocked) - Error handling """ import os from unittest.mock import MagicMock, patch import pytest from m4.core.backends.base import ConnectionError from m4.core.backends.bigquery import BigQueryBackend from m4.core.datasets import DatasetDefinition, Modality @pytest.fixture def test_dataset(): """Create a test dataset definition with BigQuery config.""" return DatasetDefinition( name="test-bq-dataset", modalities={Modality.TABULAR}, bigquery_project_id="test-project", bigquery_dataset_ids=["test_dataset_1", "test_dataset_2"], ) @pytest.fixture def mock_bigquery(): """Mock the BigQuery client and module.""" with patch("m4.core.backends.bigquery.BigQueryBackend._get_client") as mock: mock_client = MagicMock() mock.return_value = (mock_client, "test-project") yield mock_client class TestBigQueryBackendInit: """Test BigQueryBackend initialization.""" def test_default_init(self): """Test default initialization.""" backend = BigQueryBackend() assert backend.name == "bigquery" assert backend._project_id_override is None def test_init_with_project_override(self): """Test initialization with project ID override.""" backend = BigQueryBackend(project_id_override="custom-project") assert backend._project_id_override == "custom-project" class TestBigQueryProjectResolution: """Test project ID resolution.""" def test_override_takes_priority(self, test_dataset): """Test that project override takes highest priority.""" backend = BigQueryBackend(project_id_override="override-project") project_id = backend._get_project_id(test_dataset) assert project_id == "override-project" def test_env_var_takes_second_priority(self, test_dataset): """Test that M4_PROJECT_ID env var takes second priority.""" with patch.dict(os.environ, {"M4_PROJECT_ID": "env-project"}): backend = BigQueryBackend() # No override project_id = backend._get_project_id(test_dataset) assert project_id == "env-project" def test_dataset_config_used_as_fallback(self, test_dataset): """Test that dataset config is used when no override.""" # Clear env var if set env_backup = os.environ.pop("M4_PROJECT_ID", None) try: backend = BigQueryBackend() project_id = backend._get_project_id(test_dataset) assert project_id == "test-project" finally: if env_backup: os.environ["M4_PROJECT_ID"] = env_backup def test_default_project_when_no_config(self): """Test default project when dataset has no config.""" dataset = DatasetDefinition( name="no-bq-dataset", bigquery_project_id=None, bigquery_dataset_ids=[], ) env_backup = os.environ.pop("M4_PROJECT_ID", None) try: backend = BigQueryBackend() project_id = backend._get_project_id(dataset) assert project_id == "physionet-data" # Default finally: if env_backup: os.environ["M4_PROJECT_ID"] = env_backup class TestBigQueryClientCaching: """Test BigQuery client caching.""" def test_client_cached(self, test_dataset): """Test that client is cached for same project.""" with patch.dict("sys.modules", {"google.cloud.bigquery": MagicMock()}): with patch( "m4.core.backends.bigquery.BigQueryBackend._get_project_id" ) as mock_get_project: mock_get_project.return_value = "test-project" backend = BigQueryBackend() # First call creates client mock_bq = MagicMock() mock_client = MagicMock() mock_bq.Client.return_value = mock_client with patch.dict("sys.modules", {"google.cloud": MagicMock()}): with patch.dict("sys.modules", {"google.cloud.bigquery": mock_bq}): # Manually set up cache to simulate behavior backend._client_cache = { "client": mock_client, "project_id": "test-project", } # Second call should use cache client, _project = backend._get_client(test_dataset) assert client == mock_client # Client should not be created again mock_bq.Client.assert_not_called() class TestBigQueryQueryExecution: """Test query execution with mocked BigQuery.""" def test_successful_query(self, test_dataset, mock_bigquery): """Test executing a successful query.""" import pandas as pd # Set up mock to return a DataFrame mock_df = pd.DataFrame({"id": [1, 2, 3], "value": ["a", "b", "c"]}) mock_query_job = MagicMock() mock_query_job.to_dataframe.return_value = mock_df mock_bigquery.query.return_value = mock_query_job with patch.dict("sys.modules", {"google.cloud": MagicMock()}): mock_bq = MagicMock() with patch.dict("sys.modules", {"google.cloud.bigquery": mock_bq}): backend = BigQueryBackend() backend._client_cache = { "client": mock_bigquery, "project_id": "test-project", } result = backend.execute_query("SELECT * FROM test", test_dataset) assert result.success is True assert result.row_count == 3 assert "id" in result.data def test_empty_result(self, test_dataset, mock_bigquery): """Test query returning empty results.""" import pandas as pd # Set up mock to return empty DataFrame mock_df = pd.DataFrame() mock_query_job = MagicMock() mock_query_job.to_dataframe.return_value = mock_df mock_bigquery.query.return_value = mock_query_job with patch.dict("sys.modules", {"google.cloud": MagicMock()}): mock_bq = MagicMock() with patch.dict("sys.modules", {"google.cloud.bigquery": mock_bq}): backend = BigQueryBackend() backend._client_cache = { "client": mock_bigquery, "project_id": "test-project", } result = backend.execute_query("SELECT * FROM empty", test_dataset) assert result.success is True assert result.data == "No results found" assert result.row_count == 0 class TestBigQueryTableOperations: """Test table listing and info operations.""" def test_get_table_list_empty_config(self): """Test table list when no BigQuery datasets configured.""" dataset = DatasetDefinition( name="no-bq", bigquery_project_id=None, bigquery_dataset_ids=[], ) backend = BigQueryBackend() tables = backend.get_table_list(dataset) assert tables == [] def test_get_table_info_qualified_name(self, test_dataset, mock_bigquery): """Test getting table info with fully qualified name.""" import pandas as pd # Mock column info result mock_df = pd.DataFrame( { "column_name": ["id", "name"], "data_type": ["INT64", "STRING"], "is_nullable": ["NO", "YES"], } ) mock_query_job = MagicMock() mock_query_job.to_dataframe.return_value = mock_df mock_bigquery.query.return_value = mock_query_job with patch.dict("sys.modules", {"google.cloud": MagicMock()}): mock_bq = MagicMock() with patch.dict("sys.modules", {"google.cloud.bigquery": mock_bq}): backend = BigQueryBackend() backend._client_cache = { "client": mock_bigquery, "project_id": "test-project", } result = backend.get_table_info( "`test-project.test_dataset.patients`", test_dataset ) assert result.success is True assert "column_name" in result.data or "id" in result.data def test_get_table_info_invalid_qualified_name(self, test_dataset): """Test error handling for invalid qualified name.""" backend = BigQueryBackend() result = backend.get_table_info("invalid.name", test_dataset) assert result.success is False assert "Invalid" in result.error class TestBigQueryBackendInfo: """Test backend info generation.""" def test_backend_info(self, test_dataset): """Test getting backend info.""" backend = BigQueryBackend() info = backend.get_backend_info(test_dataset) assert "BigQuery" in info assert test_dataset.name in info assert "test-project" in info assert "test_dataset_1" in info def test_backend_info_no_datasets(self): """Test backend info when no datasets configured.""" dataset = DatasetDefinition( name="empty-bq", bigquery_project_id="test-project", bigquery_dataset_ids=[], ) backend = BigQueryBackend() info = backend.get_backend_info(dataset) assert "BigQuery" in info assert "none configured" in info class TestBigQueryConnectionError: """Test connection error handling.""" def test_missing_bigquery_package(self, test_dataset): """Test error when _get_client raises ConnectionError.""" backend = BigQueryBackend() # Clear cache to force new client creation backend._client_cache = {"client": None, "project_id": None} # Mock _get_client to raise ConnectionError with patch.object( backend, "_get_client", side_effect=ConnectionError( "BigQuery dependencies not found", backend="bigquery" ), ): with pytest.raises(ConnectionError) as exc_info: backend.execute_query("SELECT 1", test_dataset) assert "dependencies" in str(exc_info.value).lower()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/hannesill/m4'

If you have feedback or need assistance with the MCP directory API, please join our Discord server