Skip to main content
Glama
test_data_operations.py5.93 kB
"""Unit tests for data_operations.py. Tests for create_data_preview_with_indices function which is used by servers to generate preview data for display to users. """ from __future__ import annotations import numpy as np import pandas as pd import pytest from databeak.services.data_operations import create_data_preview_with_indices class TestCreateDataPreviewWithIndices: """Test create_data_preview_with_indices function with various data types.""" def test_basic_dataframe(self) -> None: """Test with simple dataframe.""" df = pd.DataFrame( { "name": ["Alice", "Bob", "Charlie"], "age": [30, 25, 35], "salary": [60000.0, 50000.0, 70000.0], }, ) result = create_data_preview_with_indices(df, 2) assert result["total_rows"] == 3 assert result["total_columns"] == 3 assert result["preview_rows"] == 2 assert result["columns"] == ["name", "age", "salary"] assert len(result["records"]) == 2 # Check first record structure record = result["records"][0] assert record["__row_index__"] == 0 assert record["name"] == "Alice" assert record["age"] == 30 assert record["salary"] == 60000.0 def test_with_nan_values(self) -> None: """Test handling of NaN values.""" df = pd.DataFrame({"col1": [1, np.nan, 3], "col2": ["a", "b", np.nan]}) result = create_data_preview_with_indices(df, 3) records = result["records"] assert records[1]["col1"] is None # NaN converted to None assert records[2]["col2"] is None # NaN converted to None def test_with_timestamp_values(self) -> None: """Test handling of pandas Timestamp objects.""" timestamps = pd.to_datetime(["2023-01-01", "2023-01-02", "2023-01-03"]) df = pd.DataFrame({"date": timestamps, "value": [1, 2, 3]}) result = create_data_preview_with_indices(df, 2) records = result["records"] # Timestamps should be converted to strings assert isinstance(records[0]["date"], str) assert "2023-01-01" in records[0]["date"] def test_with_numpy_types(self) -> None: """Test handling of numpy scalar types.""" df = pd.DataFrame( { "int_col": np.array([1, 2, 3], dtype=np.int64), "float_col": np.array([1.1, 2.2, 3.3], dtype=np.float64), "bool_col": np.array([True, False, True], dtype=bool), }, ) result = create_data_preview_with_indices(df, 2) records = result["records"] # Check that numpy types are converted using .item() assert isinstance(records[0]["int_col"], int) assert isinstance(records[0]["float_col"], float) assert isinstance(records[0]["bool_col"], bool) def test_with_non_integer_index(self) -> None: """Test handling of non-integer row indices.""" df = pd.DataFrame( {"col1": [1, 2, 3], "col2": ["a", "b", "c"]}, index=["row1", "row2", "row3"], ) result = create_data_preview_with_indices(df, 2) records = result["records"] # Non-integer indices should default to 0 assert records[0]["__row_index__"] == 0 assert records[1]["__row_index__"] == 0 def test_with_complex_column_names(self) -> None: """Test handling of complex column names.""" df = pd.DataFrame( { 123: [1, 2, 3], # Numeric column name "spaced column": ["a", "b", "c"], # Spaced column name ("tuple", "col"): [True, False, True], # Tuple column name }, ) result = create_data_preview_with_indices(df, 2) records = result["records"] # All column names should be converted to strings assert "123" in records[0] assert "spaced column" in records[0] assert "('tuple', 'col')" in records[0] def test_empty_dataframe(self) -> None: """Test with empty dataframe.""" df = pd.DataFrame() result = create_data_preview_with_indices(df, 5) assert result["total_rows"] == 0 assert result["total_columns"] == 0 assert result["preview_rows"] == 0 assert result["records"] == [] assert result["columns"] == [] def test_more_rows_requested_than_available(self) -> None: """Test when requesting more preview rows than available.""" df = pd.DataFrame({"col1": [1, 2]}) result = create_data_preview_with_indices(df, 10) assert result["total_rows"] == 2 assert result["preview_rows"] == 2 assert len(result["records"]) == 2 def test_zero_preview_rows(self) -> None: """Test with zero preview rows requested.""" df = pd.DataFrame({"col1": [1, 2, 3]}) result = create_data_preview_with_indices(df, 0) assert result["total_rows"] == 3 assert result["preview_rows"] == 0 assert len(result["records"]) == 0 @pytest.mark.parametrize( ("special_value", "expected"), [ (np.inf, float("inf")), (-np.inf, float("-inf")), ], ) def test_special_values(self, special_value: float, expected: float) -> None: """Test handling of special pandas/numpy values.""" df = pd.DataFrame({"col": [1, special_value, 3]}) result = create_data_preview_with_indices(df, 3) record_value = result["records"][1]["col"] assert record_value == expected def test_pd_na_values(self) -> None: """Test handling of pandas NA values separately.""" df = pd.DataFrame({"col": [1, pd.NA, 3]}) result = create_data_preview_with_indices(df, 3) record_value = result["records"][1]["col"] assert record_value is None

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jonpspri/databeak'

If you have feedback or need assistance with the MCP directory API, please join our Discord server