Skip to main content
Glama
santoshray02

CSV Editor

by santoshray02

get_column_statistics

Analyze CSV column data to calculate statistics like mean, median, and distribution for data validation and insights.

Instructions

Get detailed statistics for a specific column.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
session_idYes
columnYes

Implementation Reference

  • The primary handler function that implements the logic for computing detailed statistics (numeric, categorical, string) for a specific column in a CSV session's dataframe.
    async def get_column_statistics(
        session_id: str,
        column: str,
        ctx: Context = None
    ) -> Dict[str, Any]:
        """
        Get detailed statistics for a specific column.
        
        Args:
            session_id: Session identifier
            column: Column name to analyze
            ctx: FastMCP context
            
        Returns:
            Dict with detailed column statistics
        """
        try:
            manager = get_session_manager()
            session = manager.get_session(session_id)
            
            if not session or session.df is None:
                return {"success": False, "error": "Invalid session or no data loaded"}
            
            df = session.df
            
            if column not in df.columns:
                return {"success": False, "error": f"Column '{column}' not found"}
            
            col_data = df[column]
            result = {
                "column": column,
                "dtype": str(col_data.dtype),
                "total_count": len(col_data),
                "null_count": int(col_data.isna().sum()),
                "null_percentage": round(col_data.isna().sum() / len(col_data) * 100, 2),
                "unique_count": int(col_data.nunique()),
                "unique_percentage": round(col_data.nunique() / len(col_data) * 100, 2)
            }
            
            # Numeric column statistics
            if pd.api.types.is_numeric_dtype(col_data):
                non_null = col_data.dropna()
                result.update({
                    "type": "numeric",
                    "mean": float(non_null.mean()),
                    "median": float(non_null.median()),
                    "mode": float(non_null.mode()[0]) if len(non_null.mode()) > 0 else None,
                    "std": float(non_null.std()),
                    "variance": float(non_null.var()),
                    "min": float(non_null.min()),
                    "max": float(non_null.max()),
                    "range": float(non_null.max() - non_null.min()),
                    "sum": float(non_null.sum()),
                    "skewness": float(non_null.skew()),
                    "kurtosis": float(non_null.kurt()),
                    "25%": float(non_null.quantile(0.25)),
                    "50%": float(non_null.quantile(0.50)),
                    "75%": float(non_null.quantile(0.75)),
                    "iqr": float(non_null.quantile(0.75) - non_null.quantile(0.25)),
                    "zero_count": int((col_data == 0).sum()),
                    "positive_count": int((col_data > 0).sum()),
                    "negative_count": int((col_data < 0).sum())
                })
            
            # Categorical column statistics
            else:
                value_counts = col_data.value_counts()
                top_values = value_counts.head(10).to_dict()
                
                result.update({
                    "type": "categorical",
                    "most_frequent": str(value_counts.index[0]) if len(value_counts) > 0 else None,
                    "most_frequent_count": int(value_counts.iloc[0]) if len(value_counts) > 0 else 0,
                    "top_10_values": {str(k): int(v) for k, v in top_values.items()}
                })
                
                # String-specific stats
                if col_data.dtype == 'object':
                    str_data = col_data.dropna().astype(str)
                    if len(str_data) > 0:
                        str_lengths = str_data.str.len()
                        result["string_stats"] = {
                            "min_length": int(str_lengths.min()),
                            "max_length": int(str_lengths.max()),
                            "mean_length": round(str_lengths.mean(), 2),
                            "empty_string_count": int((str_data == "").sum())
                        }
            
            session.record_operation(OperationType.ANALYZE, {
                "type": "column_statistics",
                "column": column
            })
            
            return {
                "success": True,
                "statistics": result
            }
            
        except Exception as e:
            logger.error(f"Error getting column statistics: {str(e)}")
            return {"success": False, "error": str(e)}
  • Registers the 'get_column_statistics' tool with the FastMCP server using the @mcp.tool decorator. This wrapper function delegates execution to the actual implementation imported as _get_column_statistics from analytics.py.
    @mcp.tool
    async def get_column_statistics(
        session_id: str,
        column: str,
        ctx: Context = None
    ) -> Dict[str, Any]:
        """Get detailed statistics for a specific column."""
        return await _get_column_statistics(session_id, column, ctx)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/santoshray02/csv-editor'

If you have feedback or need assistance with the MCP directory API, please join our Discord server