Skip to main content
Glama

create_distribution_plots

Generate distribution plots for numeric data columns to visualize patterns and outliers using histograms, box plots, violin plots, or KDE plots.

Instructions

Create distribution plots for numeric columns.

Args: file_path: Path to the data file output_path: Path where to save the distribution plots columns: Optional list of specific columns to plot (if None, uses all numeric columns) plot_type: Type of distribution plot (histogram, box, violin, kde)

Returns: Information about the created distribution plots

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
file_pathYes
output_pathYes
columnsNo
plot_typeNohistogram

Implementation Reference

  • Main handler function that creates distribution plots (histogram, box, violin, or kde) for numeric columns in a dataset. Loads data, filters to numeric columns, creates subplot grid based on column count, generates plots using matplotlib/seaborn, and saves to output path.
    @mcp.tool()
    def create_distribution_plots(file_path: str, output_path: str, 
                                columns: Optional[List[str]] = None,
                                plot_type: str = "histogram") -> str:
        """
        Create distribution plots for numeric columns.
        
        Args:
            file_path: Path to the data file
            output_path: Path where to save the distribution plots
            columns: Optional list of specific columns to plot (if None, uses all numeric columns)
            plot_type: Type of distribution plot (histogram, box, violin, kde)
        
        Returns:
            Information about the created distribution plots
        """
        try:
            if not VISUALIZATION_AVAILABLE:
                return f"Error: {VISUALIZATION_ERROR}"
                
            import pandas as pd
            from pathlib import Path
            import math
            
            # Load the data
            file_extension = Path(file_path).suffix.lower()
            if file_extension == '.csv':
                df = pd.read_csv(file_path)
            elif file_extension == '.json':
                df = pd.read_json(file_path)
            elif file_extension in ['.xlsx', '.xls']:
                df = pd.read_excel(file_path)
            elif file_extension == '.tsv':
                df = pd.read_csv(file_path, sep='\t')
            else:
                df = pd.read_csv(file_path)
            
            # Select numeric columns
            if columns:
                missing_cols = [col for col in columns if col not in df.columns]
                if missing_cols:
                    return f"Error: Columns not found: {missing_cols}"
                numeric_df = df[columns].select_dtypes(include=['number'])
            else:
                numeric_df = df.select_dtypes(include=['number'])
            
            if numeric_df.empty:
                return "Error: No numeric columns found for distribution analysis"
            
            # Calculate subplot dimensions
            n_cols = len(numeric_df.columns)
            if n_cols <= 4:
                n_rows, n_plot_cols = 1, n_cols
            else:
                n_plot_cols = 3
                n_rows = math.ceil(n_cols / n_plot_cols)
            
            # Create subplots
            fig, axes = plt.subplots(n_rows, n_plot_cols, figsize=(5*n_plot_cols, 4*n_rows))
            if n_cols == 1:
                axes = [axes]
            elif n_rows == 1:
                axes = axes if n_cols > 1 else [axes]
            else:
                axes = axes.flatten()
            
            # Create distribution plots
            for i, column in enumerate(numeric_df.columns):
                ax = axes[i] if n_cols > 1 else axes[0]
                
                if plot_type == "histogram":
                    ax.hist(numeric_df[column].dropna(), bins=20, alpha=0.7, edgecolor='black')
                    ax.set_ylabel('Frequency')
                elif plot_type == "box":
                    ax.boxplot(numeric_df[column].dropna())
                    ax.set_ylabel('Value')
                elif plot_type == "violin":
                    sns.violinplot(y=numeric_df[column].dropna(), ax=ax)
                elif plot_type == "kde":
                    sns.kdeplot(data=numeric_df[column].dropna(), ax=ax)
                    ax.set_ylabel('Density')
                else:
                    return f"Error: Unsupported plot type '{plot_type}'. Use: histogram, box, violin, kde"
                
                ax.set_title(f'{plot_type.title()} of {column}')
                ax.set_xlabel(column.replace('_', ' ').title())
                ax.grid(True, alpha=0.3)
            
            # Hide empty subplots
            for i in range(n_cols, len(axes)):
                axes[i].set_visible(False)
            
            plt.tight_layout()
            plt.savefig(output_path, dpi=300, bbox_inches='tight')
            plt.close()
            
            result = {
                "distribution_plots_created": True,
                "plot_type": plot_type,
                "columns_plotted": list(numeric_df.columns),
                "total_plots": len(numeric_df.columns),
                "output_file": output_path,
                "file_size": Path(output_path).stat().st_size if Path(output_path).exists() else 0
            }
            
            return json.dumps(result, indent=2)
            
        except Exception as e:
            return f"Error creating distribution plots: {str(e)}\n{traceback.format_exc()}"
  • Tool registration using @mcp.tool() decorator that exposes the create_distribution_plots function as an MCP tool.
    @mcp.tool()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/moeloubani/visidata-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server