get_correlation_matrix
Compute pairwise correlations of numeric columns in your CSV, supporting Pearson (default) and other methods, with optional minimum correlation filter.
Instructions
Calculate correlation matrix for numeric columns.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| session_id | Yes | ||
| method | No | pearson | |
| columns | No | ||
| min_correlation | No |
Output Schema
| Name | Required | Description | Default |
|---|---|---|---|
No arguments | |||
Implementation Reference
- Core handler: calculates correlation matrix for numeric columns using pandas corr() with pearson/spearman/kendall methods, optionally filters by columns and min_correlation threshold, and returns correlation matrix plus highly correlated pairs.
async def get_correlation_matrix( session_id: str, method: str = "pearson", columns: list[str] | None = None, min_correlation: float | None = None, ctx: Context = None, ) -> dict[str, Any]: """ Calculate correlation matrix for numeric columns. Args: session_id: Session identifier method: Correlation method ('pearson', 'spearman', 'kendall') columns: Specific columns to include (None for all numeric) min_correlation: Filter to show only correlations above this threshold ctx: FastMCP context Returns: Dict with correlation matrix """ try: manager = get_session_manager() session = manager.get_session(session_id) if not session or session.df is None: return {"success": False, "error": "Invalid session or no data loaded"} df = session.df # Select columns if columns: missing_cols = [col for col in columns if col not in df.columns] if missing_cols: return {"success": False, "error": f"Columns not found: {missing_cols}"} numeric_df = df[columns].select_dtypes(include=[np.number]) else: numeric_df = df.select_dtypes(include=[np.number]) if numeric_df.empty: return {"success": False, "error": "No numeric columns found"} if len(numeric_df.columns) < 2: return {"success": False, "error": "Need at least 2 numeric columns for correlation"} # Calculate correlation if method not in ["pearson", "spearman", "kendall"]: return {"success": False, "error": f"Invalid method: {method}"} corr_matrix = numeric_df.corr(method=method) # Convert to dict format correlations = {} for col1 in corr_matrix.columns: correlations[col1] = {} for col2 in corr_matrix.columns: value = corr_matrix.loc[col1, col2] if not pd.isna(value): if min_correlation is None or abs(value) >= min_correlation or col1 == col2: correlations[col1][col2] = round(float(value), 4) # Find highly correlated pairs high_correlations = [] for i, col1 in enumerate(corr_matrix.columns): for col2 in corr_matrix.columns[i + 1 :]: corr_value = corr_matrix.loc[col1, col2] if not pd.isna(corr_value) and abs(corr_value) >= 0.7: high_correlations.append( { "column1": col1, "column2": col2, "correlation": round(float(corr_value), 4), } ) high_correlations.sort(key=lambda x: abs(x["correlation"]), reverse=True) session.record_operation( OperationType.ANALYZE, {"type": "correlation", "method": method, "columns": list(corr_matrix.columns)}, ) return { "success": True, "method": method, "correlation_matrix": correlations, "high_correlations": high_correlations, "columns_analyzed": list(corr_matrix.columns), } except Exception as e: logger.error(f"Error calculating correlation: {e!s}") return {"success": False, "error": str(e)} - src/csv_editor/server.py:317-326 (registration)MCP tool registration: decorated with @mcp.tool, exposing get_correlation_matrix to clients and delegating to the handler.
@mcp.tool async def get_correlation_matrix( session_id: str, method: str = "pearson", columns: list[str] | None = None, min_correlation: float | None = None, ctx: Context = None, ) -> dict[str, Any]: """Calculate correlation matrix for numeric columns.""" return await _get_correlation_matrix(session_id, method, columns, min_correlation, ctx) - src/csv_editor/server.py:291-291 (helper)Import alias that connects the registration in server.py to the handler in tools/analytics.py.
from .tools.analytics import get_correlation_matrix as _get_correlation_matrix