Databento MCP

test_data_quality.py•7.35 KiB

"""Tests for the data_quality module.""" import pytest import pandas as pd import numpy as np from datetime import datetime, timedelta from databento_mcp.data_quality import ( DataQualityReport, analyze_data_quality, detect_time_gaps, detect_price_outliers, detect_null_values, detect_duplicates, get_quality_score_explanation, ) class TestDataQualityReport: """Tests for DataQualityReport class.""" def test_to_string_excellent_score(self): """Test string output for excellent score.""" report = DataQualityReport(score=95, record_count=1000) result = report.to_string() assert "Quality Score: 95/100" in result assert "Excellent" in result def test_to_string_with_issues(self): """Test string output with issues.""" report = DataQualityReport( score=60, record_count=1000, issues=["Critical column missing"], warnings=["Minor issue found"], ) result = report.to_string() assert "Issues Found" in result assert "Warnings" in result assert "Critical column missing" in result def test_to_string_with_time_gaps(self): """Test string output with time gaps.""" report = DataQualityReport( score=80, record_count=1000, time_gaps=[ {"start": "2024-01-15 10:00:00", "end": "2024-01-15 10:05:00", "duration": "0:05:00"} ], ) result = report.to_string() assert "Time Gaps" in result def test_to_string_with_outliers(self): """Test string output with price outliers.""" report = DataQualityReport( score=75, record_count=1000, price_outliers=[ {"price": 999.99, "deviation": 5.5} ], ) result = report.to_string() assert "Price Outliers" in result class TestAnalyzeDataQuality: """Tests for analyze_data_quality function.""" def test_empty_dataframe(self): """Test analysis of empty DataFrame.""" df = pd.DataFrame() report = analyze_data_quality(df) assert report.score == 0 assert len(report.issues) > 0 def test_perfect_data(self): """Test analysis of perfect data.""" df = pd.DataFrame({ "ts_event": pd.date_range("2024-01-15 09:30:00", periods=100, freq="1s"), "price": np.linspace(100, 101, 100), "size": np.ones(100) * 10, }) report = analyze_data_quality(df, "trades") assert report.score >= 80 # Should be high quality def test_data_with_duplicates(self): """Test analysis of data with duplicates.""" df = pd.DataFrame({ "price": [100.0, 100.0, 100.0, 101.0, 102.0], "size": [10, 10, 10, 20, 30], }) report = analyze_data_quality(df, "trades") assert report.duplicate_count > 0 def test_data_with_null_values(self): """Test analysis of data with null values.""" df = pd.DataFrame({ "price": [100.0, None, 102.0], "size": [10, 20, None], }) report = analyze_data_quality(df, "trades") assert len(report.null_columns) > 0 class TestDetectTimeGaps: """Tests for detect_time_gaps function.""" def test_no_timestamp_column(self): """Test with no timestamp column.""" df = pd.DataFrame({"price": [100, 101, 102]}) gaps = detect_time_gaps(df) assert len(gaps) == 0 def test_continuous_timestamps(self): """Test with continuous timestamps (no gaps).""" df = pd.DataFrame({ "ts_event": pd.date_range("2024-01-15 09:30:00", periods=10, freq="1s") }) gaps = detect_time_gaps(df, threshold_seconds=60) assert len(gaps) == 0 def test_gap_detected(self): """Test that gaps are detected.""" timestamps = pd.to_datetime([ "2024-01-15 09:30:00", "2024-01-15 09:30:01", "2024-01-15 09:35:00", # 5 minute gap "2024-01-15 09:35:01", ]) df = pd.DataFrame({"ts_event": timestamps}) gaps = detect_time_gaps(df, threshold_seconds=60) assert len(gaps) == 1 assert gaps[0]["seconds"] > 60 class TestDetectPriceOutliers: """Tests for detect_price_outliers function.""" def test_no_outliers(self): """Test data with no outliers.""" df = pd.DataFrame({ "price": np.linspace(100, 101, 100) # Smooth price range }) outliers = detect_price_outliers(df) assert len(outliers) == 0 def test_outliers_detected(self): """Test that outliers are detected.""" prices = [100.0] * 50 + [999.0] + [100.0] * 50 # One obvious outlier df = pd.DataFrame({"price": prices}) outliers = detect_price_outliers(df, std_threshold=3.0) assert len(outliers) > 0 assert any(o["price"] == 999.0 for o in outliers) def test_no_price_column(self): """Test with no price column.""" df = pd.DataFrame({"other": [1, 2, 3]}) outliers = detect_price_outliers(df) assert len(outliers) == 0 class TestDetectNullValues: """Tests for detect_null_values function.""" def test_no_nulls(self): """Test data with no null values.""" df = pd.DataFrame({ "a": [1, 2, 3], "b": [4, 5, 6], }) null_cols = detect_null_values(df) assert len(null_cols) == 0 def test_nulls_detected(self): """Test that null values are detected.""" df = pd.DataFrame({ "a": [1, None, 3], "b": [4, 5, 6], "c": [None, None, None], }) null_cols = detect_null_values(df) assert len(null_cols) == 2 assert "a" in null_cols assert "c" in null_cols class TestDetectDuplicates: """Tests for detect_duplicates function.""" def test_no_duplicates(self): """Test data with no duplicates.""" df = pd.DataFrame({ "a": [1, 2, 3], "b": [4, 5, 6], }) dup_count = detect_duplicates(df) assert dup_count == 0 def test_duplicates_detected(self): """Test that duplicates are detected.""" df = pd.DataFrame({ "a": [1, 1, 2], "b": [4, 4, 5], }) dup_count = detect_duplicates(df) assert dup_count == 1 class TestGetQualityScoreExplanation: """Tests for get_quality_score_explanation function.""" def test_excellent_score(self): """Test explanation for excellent score.""" explanation = get_quality_score_explanation(95) assert "Excellent" in explanation def test_good_score(self): """Test explanation for good score.""" explanation = get_quality_score_explanation(75) assert "Good" in explanation def test_fair_score(self): """Test explanation for fair score.""" explanation = get_quality_score_explanation(55) assert "Fair" in explanation def test_poor_score(self): """Test explanation for poor score.""" explanation = get_quality_score_explanation(30) assert "Poor" in explanation def test_very_poor_score(self): """Test explanation for very poor score.""" explanation = get_quality_score_explanation(10) assert "Very poor" in explanation

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/deepentropy/databento-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_data_quality.py•7.35 KiB