Skip to main content
Glama
test_duckdb.py16.1 kB
"""Tests for m4.core.backends.duckdb module. Tests cover: - DuckDBBackend initialization - Database path resolution - Query execution (mocked) - Table listing - Error handling """ import os import tempfile from pathlib import Path from unittest.mock import patch import pytest from m4.core.backends.base import ConnectionError, TableNotFoundError from m4.core.backends.duckdb import DuckDBBackend from m4.core.datasets import DatasetDefinition, Modality @pytest.fixture def test_dataset(): """Create a test dataset definition.""" return DatasetDefinition( name="test-dataset", modalities={Modality.TABULAR}, default_duckdb_filename="test_dataset.duckdb", ) @pytest.fixture def temp_db(test_dataset): """Create a temporary DuckDB database for testing.""" import duckdb with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "test.duckdb" # Create a minimal test database conn = duckdb.connect(str(db_path)) conn.execute( """ CREATE TABLE patients ( subject_id INTEGER PRIMARY KEY, gender VARCHAR, anchor_age INTEGER ) """ ) conn.execute( """ INSERT INTO patients VALUES (1, 'M', 65), (2, 'F', 42), (3, 'M', 55) """ ) conn.close() yield db_path class TestDuckDBBackendInit: """Test DuckDBBackend initialization.""" def test_default_init(self): """Test default initialization.""" backend = DuckDBBackend() assert backend.name == "duckdb" assert backend._db_path_override is None def test_init_with_path_override(self, temp_db): """Test initialization with path override.""" backend = DuckDBBackend(db_path_override=temp_db) assert backend._db_path_override == temp_db class TestDuckDBPathResolution: """Test database path resolution.""" def test_path_override_takes_priority(self, test_dataset, temp_db): """Test that path override takes highest priority.""" backend = DuckDBBackend(db_path_override=temp_db) path = backend._get_db_path(test_dataset) assert path == temp_db def test_env_var_takes_second_priority(self, test_dataset, temp_db): """Test that M4_DB_PATH env var takes second priority.""" with patch.dict(os.environ, {"M4_DB_PATH": str(temp_db)}): backend = DuckDBBackend() # No override path = backend._get_db_path(test_dataset) assert path == temp_db def test_dataset_config_used_as_fallback(self, test_dataset): """Test that dataset config is used when no override.""" with patch.dict(os.environ, {}, clear=True): # Remove M4_DB_PATH if set env_backup = os.environ.pop("M4_DB_PATH", None) try: backend = DuckDBBackend() with patch( "m4.core.backends.duckdb.get_default_database_path" ) as mock_get_path: mock_get_path.return_value = Path("/mock/path/test.duckdb") path = backend._get_db_path(test_dataset) assert path == Path("/mock/path/test.duckdb") mock_get_path.assert_called_once_with(test_dataset.name) finally: if env_backup: os.environ["M4_DB_PATH"] = env_backup class TestDuckDBQueryExecution: """Test query execution.""" def test_successful_query(self, test_dataset, temp_db): """Test executing a successful query.""" backend = DuckDBBackend(db_path_override=temp_db) result = backend.execute_query("SELECT * FROM patients", test_dataset) assert result.success is True assert result.error is None assert result.row_count == 3 assert "subject_id" in result.data assert "gender" in result.data def test_query_with_limit(self, test_dataset, temp_db): """Test query with LIMIT clause.""" backend = DuckDBBackend(db_path_override=temp_db) result = backend.execute_query("SELECT * FROM patients LIMIT 1", test_dataset) assert result.success is True assert result.row_count == 1 def test_empty_result(self, test_dataset, temp_db): """Test query returning no results.""" backend = DuckDBBackend(db_path_override=temp_db) result = backend.execute_query( "SELECT * FROM patients WHERE subject_id = 999", test_dataset ) assert result.success is True assert result.data == "No results found" assert result.row_count == 0 def test_table_not_found(self, test_dataset, temp_db): """Test query against non-existent table.""" backend = DuckDBBackend(db_path_override=temp_db) result = backend.execute_query("SELECT * FROM nonexistent_table", test_dataset) assert result.success is False assert result.error is not None def test_connection_error_missing_db(self, test_dataset): """Test connection error when database file doesn't exist.""" backend = DuckDBBackend(db_path_override=Path("/nonexistent/path/db.duckdb")) with pytest.raises(ConnectionError) as exc_info: backend.execute_query("SELECT 1", test_dataset) assert "not found" in str(exc_info.value).lower() assert exc_info.value.backend == "duckdb" class TestDuckDBTableOperations: """Test table listing and info operations.""" def test_get_table_list(self, test_dataset, temp_db): """Test listing tables.""" backend = DuckDBBackend(db_path_override=temp_db) tables = backend.get_table_list(test_dataset) assert "patients" in tables def test_get_table_info(self, test_dataset, temp_db): """Test getting table schema info.""" backend = DuckDBBackend(db_path_override=temp_db) result = backend.get_table_info("patients", test_dataset) assert result.success is True # PRAGMA table_info returns column metadata assert "subject_id" in result.data or "name" in result.data def test_get_table_info_not_found(self, test_dataset, temp_db): """Test getting info for non-existent table.""" backend = DuckDBBackend(db_path_override=temp_db) # Should raise or return error with pytest.raises(TableNotFoundError): backend.get_table_info("nonexistent_table", test_dataset) def test_get_sample_data(self, test_dataset, temp_db): """Test getting sample data from table.""" backend = DuckDBBackend(db_path_override=temp_db) result = backend.get_sample_data("patients", test_dataset, limit=2) assert result.success is True # Should return at most 2 rows assert result.row_count <= 2 class TestDuckDBBackendInfo: """Test backend info generation.""" def test_backend_info(self, test_dataset, temp_db): """Test getting backend info.""" backend = DuckDBBackend(db_path_override=temp_db) info = backend.get_backend_info(test_dataset) assert "DuckDB" in info assert test_dataset.name in info assert str(temp_db) in info def test_backend_info_missing_db(self, test_dataset): """Test backend info when database path can't be determined.""" backend = DuckDBBackend() with patch( "m4.core.backends.duckdb.get_default_database_path" ) as mock_get_path: mock_get_path.return_value = None info = backend.get_backend_info(test_dataset) assert "DuckDB" in info assert "unknown" in info class TestDuckDBResultTruncation: """Test result truncation for large result sets.""" def test_large_result_truncated(self, test_dataset): """Test that large results are truncated.""" import duckdb with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "large_test.duckdb" # Create a database with many rows conn = duckdb.connect(str(db_path)) conn.execute("CREATE TABLE big_table (id INTEGER, value VARCHAR)") # Insert 100 rows for i in range(100): conn.execute(f"INSERT INTO big_table VALUES ({i}, 'value_{i}')") conn.close() backend = DuckDBBackend(db_path_override=db_path) result = backend.execute_query("SELECT * FROM big_table", test_dataset) assert result.success is True assert result.row_count == 100 assert result.truncated is True assert "showing first 50" in result.data.lower() class TestDuckDBEdgeCases: """Test edge cases and boundary conditions for DuckDB backend.""" def test_execute_query_with_null_values(self, test_dataset): """Test handling of NULL values in results.""" import duckdb with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "nulls.duckdb" conn = duckdb.connect(str(db_path)) conn.execute( """ CREATE TABLE with_nulls ( id INTEGER, name VARCHAR, value FLOAT ) """ ) conn.execute( """ INSERT INTO with_nulls VALUES (1, 'test', NULL), (2, NULL, 3.14), (NULL, 'another', 2.71) """ ) conn.close() backend = DuckDBBackend(db_path_override=db_path) result = backend.execute_query("SELECT * FROM with_nulls", test_dataset) assert result.success is True assert result.row_count == 3 # NULL values should be represented in output assert ( "None" in result.data or "NaN" in result.data or "<NA>" in result.data ) def test_execute_query_with_unicode(self, test_dataset): """Test handling of unicode characters.""" import duckdb with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "unicode.duckdb" conn = duckdb.connect(str(db_path)) conn.execute("CREATE TABLE unicode_test (name VARCHAR)") conn.execute("INSERT INTO unicode_test VALUES ('Test'), (''), ('Emoji')") conn.close() backend = DuckDBBackend(db_path_override=db_path) result = backend.execute_query("SELECT * FROM unicode_test", test_dataset) assert result.success is True assert result.row_count == 3 def test_execute_query_with_very_long_string(self, test_dataset): """Test handling of very long string values.""" import duckdb with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "longstring.duckdb" conn = duckdb.connect(str(db_path)) conn.execute("CREATE TABLE long_strings (content VARCHAR)") long_string = "A" * 10000 # 10KB string conn.execute(f"INSERT INTO long_strings VALUES ('{long_string}')") conn.close() backend = DuckDBBackend(db_path_override=db_path) result = backend.execute_query( "SELECT LENGTH(content) as len FROM long_strings", test_dataset ) assert result.success is True assert "10000" in result.data def test_execute_query_with_special_column_names(self, test_dataset): """Test handling of column names with special characters.""" import duckdb with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "special.duckdb" conn = duckdb.connect(str(db_path)) conn.execute( """ CREATE TABLE special_cols ( "column with spaces" INTEGER, "column-with-dashes" INTEGER, normal_column INTEGER ) """ ) conn.execute( """ INSERT INTO special_cols VALUES (1, 2, 3) """ ) conn.close() backend = DuckDBBackend(db_path_override=db_path) result = backend.execute_query("SELECT * FROM special_cols", test_dataset) assert result.success is True assert result.row_count == 1 def test_get_sample_data_limit_sanitization(self, test_dataset, temp_db): """Test that limit is properly sanitized for sample data.""" backend = DuckDBBackend(db_path_override=temp_db) # Test negative limit (should be clamped to 1) result = backend.get_sample_data("patients", test_dataset, limit=-5) assert result.row_count <= 1 # Test excessive limit (should be clamped to 100) result = backend.get_sample_data("patients", test_dataset, limit=1000) assert result.row_count <= 100 def test_get_table_list_empty_database(self, test_dataset): """Test get_table_list on database with no tables. Note: The implementation returns ['No results found'] when there are no tables, rather than an empty list. This is intentional for user-friendly messaging. """ import duckdb with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "empty.duckdb" conn = duckdb.connect(str(db_path)) conn.close() backend = DuckDBBackend(db_path_override=db_path) tables = backend.get_table_list(test_dataset) # Implementation returns message rather than empty list assert tables == ["No results found"] or tables == [] def test_concurrent_read_operations(self, test_dataset, temp_db): """Test that concurrent read operations work correctly.""" import concurrent.futures backend = DuckDBBackend(db_path_override=temp_db) def execute_query(): return backend.execute_query("SELECT * FROM patients", test_dataset) # Run 5 concurrent queries with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: futures = [executor.submit(execute_query) for _ in range(5)] results = [f.result() for f in concurrent.futures.as_completed(futures)] # All should succeed for result in results: assert result.success is True assert result.row_count == 3 def test_query_with_aggregate_functions(self, test_dataset, temp_db): """Test queries with aggregate functions.""" backend = DuckDBBackend(db_path_override=temp_db) result = backend.execute_query( "SELECT COUNT(*) as cnt, AVG(anchor_age) as avg_age FROM patients", test_dataset, ) assert result.success is True assert "cnt" in result.data or "count" in result.data.lower() def test_query_with_window_functions(self, test_dataset, temp_db): """Test queries with window functions.""" backend = DuckDBBackend(db_path_override=temp_db) result = backend.execute_query( """ SELECT subject_id, gender, ROW_NUMBER() OVER (ORDER BY subject_id) as row_num FROM patients """, test_dataset, ) assert result.success is True assert "row_num" in result.data def test_query_syntax_error(self, test_dataset, temp_db): """Test handling of SQL syntax errors.""" backend = DuckDBBackend(db_path_override=temp_db) result = backend.execute_query( "SELCT * FROM patients", # Typo in SELECT test_dataset, ) assert result.success is False assert result.error is not None

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/hannesill/m4'

If you have feedback or need assistance with the MCP directory API, please join our Discord server