Skip to main content
Glama
dataframe_utils.py7.7 kB
"""DataFrame compatibility utilities for Sherlog MCP. This module provides a bridge between polars and pandas DataFrames, allowing the codebase to use polars for performance while maintaining pandas compatibility. """ import logging from io import StringIO from typing import Any import numpy as np import pandas as pd import polars as pl import json logger = logging.getLogger(__name__) DataFrame = pl.DataFrame LazyFrame = pl.LazyFrame def to_polars(df: pd.DataFrame | pl.DataFrame | Any) -> pl.DataFrame: """Convert a DataFrame to polars format. Args: df: Input DataFrame (pandas or polars) Returns: pl.DataFrame: Polars DataFrame Raises: ImportError: If polars is not available ValueError: If conversion fails """ if isinstance(df, pl.DataFrame): return df elif isinstance(df, pd.DataFrame): return pl.from_pandas(df) else: raise ValueError(f"Cannot convert {type(df)} to polars DataFrame") def to_pandas(df: pd.DataFrame | pl.DataFrame | Any) -> pd.DataFrame: """Convert a DataFrame to pandas format. Args: df: Input DataFrame (pandas or polars) Returns: pd.DataFrame: Pandas DataFrame """ if isinstance(df, pd.DataFrame): return df elif isinstance(df, pl.DataFrame): return df.to_pandas() else: raise ValueError(f"Cannot convert {type(df)} to pandas DataFrame") def create_dataframe_polars(data: Any) -> pl.DataFrame: """Create a polars DataFrame from various data formats. Args: data: Input data in various formats Returns: pl.DataFrame: Created DataFrame """ if data is None: return pl.DataFrame() if isinstance(data, pl.DataFrame): return data if isinstance(data, pd.DataFrame): return pl.from_pandas(data) if isinstance(data, list) and len(data) > 0: if all(isinstance(item, dict) for item in data): return pl.DataFrame(data) elif all(isinstance(item, list) for item in data): if len(data) > 1: try: return pl.DataFrame(data[1:], schema=data[0]) except: return pl.DataFrame(data) else: return pl.DataFrame(data) if isinstance(data, dict): if all(isinstance(v, list) for v in data.values()): lengths = [len(v) for v in data.values()] if len(set(lengths)) == 1: return pl.DataFrame(data) if all(not isinstance(v, (list, dict)) for v in data.values()): return pl.DataFrame([data]) if isinstance(data, np.ndarray): return pl.DataFrame(data) if isinstance(data, str): try: json_data = json.loads(data) return create_dataframe_polars(json_data) except: pass try: return pl.read_csv(StringIO(data)) except: pass try: return pl.DataFrame({"result": [data]}) except: pass logger.warning( f"Could not convert {type(data)} to polars DataFrame, returning empty DataFrame" ) return pl.DataFrame() def create_dataframe_pandas(data: Any) -> pl.DataFrame | pd.DataFrame | Any: """Create a pandas DataFrame from various data formats. This is a fallback for when polars is not available or when pandas is specifically required. Args: data: Input data in various formats Returns: pd.DataFrame: Created DataFrame """ if data is None: return pd.DataFrame() if isinstance(data, pd.DataFrame): return data if isinstance(data, pl.DataFrame): return data.to_pandas() if isinstance(data, list) and len(data) > 0: if all(isinstance(item, dict) for item in data): return pd.DataFrame(data) elif all(isinstance(item, list) for item in data): if len(data) > 1: try: df = pd.DataFrame(data[1:], columns=data[0]) return df except: return pd.DataFrame(data) else: return pd.DataFrame(data) if isinstance(data, dict): if all(isinstance(v, list) for v in data.values()): lengths = [len(v) for v in data.values()] if len(set(lengths)) == 1: return pd.DataFrame(data) if all(not isinstance(v, (list, dict)) for v in data.values()): return pd.DataFrame([data]) try: return pd.json_normalize(data) except: pass if isinstance(data, np.ndarray): return pd.DataFrame(data) if isinstance(data, str): try: json_data = json.loads(data) return create_dataframe_pandas(json_data) except: pass try: return pd.read_csv(StringIO(data)) except: pass try: return pd.DataFrame({"result": [data]}) except: pass return data def smart_create_dataframe( data: Any, prefer_polars: bool = True ) -> pl.DataFrame | pd.DataFrame: """Intelligently create a DataFrame, preferring polars when available. Args: data: Input data in various formats prefer_polars: Whether to prefer polars over pandas Returns: DataFrame: Created DataFrame (polars or pandas) """ if prefer_polars: try: return create_dataframe_polars(data) except Exception as e: logger.warning( f"Failed to create polars DataFrame: {e}, falling back to pandas" ) return create_dataframe_pandas(data) else: return create_dataframe_pandas(data) def read_csv_smart(file_path: str, **kwargs) -> DataFrame: """Smart CSV reader that uses polars when available for better performance. Args: file_path: Path to CSV file **kwargs: Additional arguments passed to the reader Returns: DataFrame: Loaded DataFrame """ return pl.read_csv(file_path, **kwargs) def optimize_for_analytics(df: DataFrame) -> DataFrame: """Optimize DataFrame for analytics operations. Converts to polars if available for better performance in analytical operations. Args: df: Input DataFrame Returns: DataFrame: Optimized DataFrame """ return to_polars(df) def to_json_serializable( df: pl.DataFrame | pd.DataFrame | Any, ) -> list[dict[str, Any]] | Any: """Convert DataFrame to JSON-serializable format for MCP tool responses. This function properly handles pandas and polars DataFrames to ensure they are correctly serialized for JSON responses, avoiding issues with special data types, timestamps, and NaN values. Args: df: Input DataFrame or other data Returns: JSON-serializable data (list of dicts for DataFrames, original data otherwise) """ if isinstance(df, pd.DataFrame): df_clean = df.replace([np.inf, -np.inf], ['Infinity', '-Infinity']) df_clean = df_clean.where(pd.notnull(df_clean), None) try: return json.loads(df_clean.to_json(orient="records", date_format="iso")) except (ValueError, TypeError): return df_clean.to_dict(orient="records") elif isinstance(df, pl.DataFrame): return df.to_dicts() else: return df __all__ = [ "DataFrame", "LazyFrame", "to_polars", "to_pandas", "smart_create_dataframe", "read_csv_smart", "optimize_for_analytics", "to_json_serializable", ]

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/GetSherlog/Sherlog-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server