get_statistics
Generate statistical summaries for numerical columns in CSV files, including percentiles, to analyze and interpret data efficiently using the MCP server.
Instructions
Get statistical summary of numerical columns.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| columns | No | ||
| include_percentiles | No | ||
| session_id | Yes |
Implementation Reference
- src/csv_editor/tools/analytics.py:16-97 (handler)Core handler function that executes the get_statistics tool logic: retrieves CSV session data, identifies numeric columns, computes comprehensive statistics (mean, std, min, max, percentiles, skewness, kurtosis), and returns structured results.async def get_statistics( session_id: str, columns: Optional[List[str]] = None, include_percentiles: bool = True, ctx: Context = None ) -> Dict[str, Any]: """ Get statistical summary of numerical columns. Args: session_id: Session identifier columns: Specific columns to analyze (None for all numeric) include_percentiles: Include percentile values ctx: FastMCP context Returns: Dict with statistics for each column """ try: manager = get_session_manager() session = manager.get_session(session_id) if not session or session.df is None: return {"success": False, "error": "Invalid session or no data loaded"} df = session.df # Select columns to analyze if columns: missing_cols = [col for col in columns if col not in df.columns] if missing_cols: return {"success": False, "error": f"Columns not found: {missing_cols}"} numeric_df = df[columns].select_dtypes(include=[np.number]) else: numeric_df = df.select_dtypes(include=[np.number]) if numeric_df.empty: return {"success": False, "error": "No numeric columns found"} # Calculate statistics stats = {} percentiles = [0.25, 0.5, 0.75] if include_percentiles else [] for col in numeric_df.columns: col_data = numeric_df[col].dropna() col_stats = { "count": int(col_data.count()), "null_count": int(df[col].isna().sum()), "mean": float(col_data.mean()), "std": float(col_data.std()), "min": float(col_data.min()), "max": float(col_data.max()), "sum": float(col_data.sum()), "variance": float(col_data.var()), "skewness": float(col_data.skew()), "kurtosis": float(col_data.kurt()) } if include_percentiles: col_stats["25%"] = float(col_data.quantile(0.25)) col_stats["50%"] = float(col_data.quantile(0.50)) col_stats["75%"] = float(col_data.quantile(0.75)) col_stats["iqr"] = col_stats["75%"] - col_stats["25%"] stats[col] = col_stats session.record_operation(OperationType.ANALYZE, { "type": "statistics", "columns": list(stats.keys()) }) return { "success": True, "statistics": stats, "columns_analyzed": list(stats.keys()), "total_rows": len(df) } except Exception as e: logger.error(f"Error getting statistics: {str(e)}") return {"success": False, "error": str(e)}
- src/csv_editor/server.py:318-326 (registration)MCP tool registration point using @mcp.tool decorator. This thin wrapper delegates to the core implementation in analytics.py via imported _get_statistics.async def get_statistics( session_id: str, columns: Optional[List[str]] = None, include_percentiles: bool = True, ctx: Context = None ) -> Dict[str, Any]: """Get statistical summary of numerical columns.""" return await _get_statistics(session_id, columns, include_percentiles, ctx)