get_statistics
Calculate statistical metrics (mean, median, standard deviation, and optional percentiles) for numerical columns in a CSV session.
Instructions
Get statistical summary of numerical columns.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| session_id | Yes | ||
| columns | No | ||
| include_percentiles | No |
Output Schema
| Name | Required | Description | Default |
|---|---|---|---|
No arguments | |||
Implementation Reference
- src/csv_editor/server.py:298-306 (registration)MCP tool registration via @mcp.tool decorator - delegates to the analytics module implementation
@mcp.tool async def get_statistics( session_id: str, columns: list[str] | None = None, include_percentiles: bool = True, ctx: Context = None, ) -> dict[str, Any]: """Get statistical summary of numerical columns.""" return await _get_statistics(session_id, columns, include_percentiles, ctx) - src/csv_editor/tools/analytics.py:16-96 (handler)Core handler - calculates statistical summary (count, mean, std, min, max, sum, variance, skewness, kurtosis, optional percentiles) for numeric columns in a CSV session
async def get_statistics( session_id: str, columns: list[str] | None = None, include_percentiles: bool = True, ctx: Context = None, ) -> dict[str, Any]: """ Get statistical summary of numerical columns. Args: session_id: Session identifier columns: Specific columns to analyze (None for all numeric) include_percentiles: Include percentile values ctx: FastMCP context Returns: Dict with statistics for each column """ try: manager = get_session_manager() session = manager.get_session(session_id) if not session or session.df is None: return {"success": False, "error": "Invalid session or no data loaded"} df = session.df # Select columns to analyze if columns: missing_cols = [col for col in columns if col not in df.columns] if missing_cols: return {"success": False, "error": f"Columns not found: {missing_cols}"} numeric_df = df[columns].select_dtypes(include=[np.number]) else: numeric_df = df.select_dtypes(include=[np.number]) if numeric_df.empty: return {"success": False, "error": "No numeric columns found"} # Calculate statistics stats = {} percentiles = [0.25, 0.5, 0.75] if include_percentiles else [] for col in numeric_df.columns: col_data = numeric_df[col].dropna() col_stats = { "count": int(col_data.count()), "null_count": int(df[col].isna().sum()), "mean": float(col_data.mean()), "std": float(col_data.std()), "min": float(col_data.min()), "max": float(col_data.max()), "sum": float(col_data.sum()), "variance": float(col_data.var()), "skewness": float(col_data.skew()), "kurtosis": float(col_data.kurt()), } if include_percentiles: col_stats["25%"] = float(col_data.quantile(0.25)) col_stats["50%"] = float(col_data.quantile(0.50)) col_stats["75%"] = float(col_data.quantile(0.75)) col_stats["iqr"] = col_stats["75%"] - col_stats["25%"] stats[col] = col_stats session.record_operation( OperationType.ANALYZE, {"type": "statistics", "columns": list(stats.keys())} ) return { "success": True, "statistics": stats, "columns_analyzed": list(stats.keys()), "total_rows": len(df), } except Exception as e: logger.error(f"Error getting statistics: {e!s}") return {"success": False, "error": str(e)} - src/csv_editor/server.py:81-87 (registration)Capabilities listing that includes 'get_statistics' in the data_analysis group
"get_statistics", "correlation_matrix", "group_by_aggregate", "value_counts", "detect_outliers", "profile_data", ], - src/csv_editor/server.py:292-292 (registration)Import alias for the analytics module's implementation
from .tools.analytics import get_statistics as _get_statistics - Separate get_statistics method on HistoryManager - returns history/undo/redo statistics, not column data statistics
def get_statistics(self) -> dict[str, Any]: """Get history statistics.""" if not self.history: return { "total_operations": 0, "operation_types": {}, "first_operation": None, "last_operation": None, "snapshots_count": 0, } # Count operation types operation_types = {} snapshots_count = 0 for entry in self.history: operation_types[entry.operation_type] = operation_types.get(entry.operation_type, 0) + 1 if entry.data_snapshot is not None: snapshots_count += 1 return { "total_operations": len(self.history), "current_position": self.current_index + 1, "can_undo": self.can_undo(), "can_redo": self.can_redo(), "redo_stack_size": len(self.redo_stack), "operation_types": operation_types, "first_operation": self.history[0].timestamp.isoformat() if self.history else None, "last_operation": self.history[-1].timestamp.isoformat() if self.history else None, "snapshots_count": snapshots_count, "storage_type": self.storage_type.value, "max_history": self.max_history, }