create_distribution_plots
Generate distribution plots for numeric data columns to visualize patterns and outliers using histograms, box plots, violin plots, or KDE plots.
Instructions
Create distribution plots for numeric columns.
Args: file_path: Path to the data file output_path: Path where to save the distribution plots columns: Optional list of specific columns to plot (if None, uses all numeric columns) plot_type: Type of distribution plot (histogram, box, violin, kde)
Returns: Information about the created distribution plots
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| file_path | Yes | ||
| output_path | Yes | ||
| columns | No | ||
| plot_type | No | histogram |
Implementation Reference
- src/visidata_mcp/server.py:768-876 (handler)Main handler function that creates distribution plots (histogram, box, violin, or kde) for numeric columns in a dataset. Loads data, filters to numeric columns, creates subplot grid based on column count, generates plots using matplotlib/seaborn, and saves to output path.
@mcp.tool() def create_distribution_plots(file_path: str, output_path: str, columns: Optional[List[str]] = None, plot_type: str = "histogram") -> str: """ Create distribution plots for numeric columns. Args: file_path: Path to the data file output_path: Path where to save the distribution plots columns: Optional list of specific columns to plot (if None, uses all numeric columns) plot_type: Type of distribution plot (histogram, box, violin, kde) Returns: Information about the created distribution plots """ try: if not VISUALIZATION_AVAILABLE: return f"Error: {VISUALIZATION_ERROR}" import pandas as pd from pathlib import Path import math # Load the data file_extension = Path(file_path).suffix.lower() if file_extension == '.csv': df = pd.read_csv(file_path) elif file_extension == '.json': df = pd.read_json(file_path) elif file_extension in ['.xlsx', '.xls']: df = pd.read_excel(file_path) elif file_extension == '.tsv': df = pd.read_csv(file_path, sep='\t') else: df = pd.read_csv(file_path) # Select numeric columns if columns: missing_cols = [col for col in columns if col not in df.columns] if missing_cols: return f"Error: Columns not found: {missing_cols}" numeric_df = df[columns].select_dtypes(include=['number']) else: numeric_df = df.select_dtypes(include=['number']) if numeric_df.empty: return "Error: No numeric columns found for distribution analysis" # Calculate subplot dimensions n_cols = len(numeric_df.columns) if n_cols <= 4: n_rows, n_plot_cols = 1, n_cols else: n_plot_cols = 3 n_rows = math.ceil(n_cols / n_plot_cols) # Create subplots fig, axes = plt.subplots(n_rows, n_plot_cols, figsize=(5*n_plot_cols, 4*n_rows)) if n_cols == 1: axes = [axes] elif n_rows == 1: axes = axes if n_cols > 1 else [axes] else: axes = axes.flatten() # Create distribution plots for i, column in enumerate(numeric_df.columns): ax = axes[i] if n_cols > 1 else axes[0] if plot_type == "histogram": ax.hist(numeric_df[column].dropna(), bins=20, alpha=0.7, edgecolor='black') ax.set_ylabel('Frequency') elif plot_type == "box": ax.boxplot(numeric_df[column].dropna()) ax.set_ylabel('Value') elif plot_type == "violin": sns.violinplot(y=numeric_df[column].dropna(), ax=ax) elif plot_type == "kde": sns.kdeplot(data=numeric_df[column].dropna(), ax=ax) ax.set_ylabel('Density') else: return f"Error: Unsupported plot type '{plot_type}'. Use: histogram, box, violin, kde" ax.set_title(f'{plot_type.title()} of {column}') ax.set_xlabel(column.replace('_', ' ').title()) ax.grid(True, alpha=0.3) # Hide empty subplots for i in range(n_cols, len(axes)): axes[i].set_visible(False) plt.tight_layout() plt.savefig(output_path, dpi=300, bbox_inches='tight') plt.close() result = { "distribution_plots_created": True, "plot_type": plot_type, "columns_plotted": list(numeric_df.columns), "total_plots": len(numeric_df.columns), "output_file": output_path, "file_size": Path(output_path).stat().st_size if Path(output_path).exists() else 0 } return json.dumps(result, indent=2) except Exception as e: return f"Error creating distribution plots: {str(e)}\n{traceback.format_exc()}" - src/visidata_mcp/server.py:768-768 (registration)Tool registration using @mcp.tool() decorator that exposes the create_distribution_plots function as an MCP tool.
@mcp.tool()