Vibe Preprocessing and Analysis MCP Server

main.py•19.6 kB

# server.py import os import json import pandas as pd from mcp.server.fastmcp import FastMCP from utils import standardize_mixed_type_columns, save_plot_to_claude_graphs, save_df_to_work_dir import matplotlib.pyplot as plt import numpy as np import seaborn as sns import base64 import io # Create an MCP server mcp = FastMCP("csv-xlsx-py", dependencies=["pandas", "matplotlib", "seaborn"]) df = [] @mcp.tool() def send_work_dir() -> str: """Retrieve the current value of the WORK_DIR environment variable.""" try: work_dir = os.environ.get("WORK_DIR") if not work_dir: return json.dumps({ "success": False, "message": "WORK_DIR environment variable is not set." }) return json.dumps({ "success": True, "message": "WORK_DIR found.", "data": work_dir }) except Exception as e: return json.dumps({ "success": False, "message": f"Unexpected error: {str(e)}" }) @mcp.tool() def set_work_dir(new_work_dir: str) -> str: """Set a new value for the WORK_DIR environment variable.""" try: if not new_work_dir: return json.dumps({ "success": False, "message": "New work directory cannot be empty." }) os.environ["WORK_DIR"] = new_work_dir return json.dumps({ "success": True, "message": f"WORK_DIR has been set to: {new_work_dir}" }) except Exception as e: return json.dumps({ "success": False, "message": f"Unexpected error: {str(e)}" }) @mcp.tool() def list_work_dir_files() -> str: """List all files and directories in the current WORK_DIR.""" try: work_dir = os.environ.get("WORK_DIR") if not work_dir: return json.dumps({ "success": False, "message": "WORK_DIR environment variable is not set." }) if not os.path.exists(work_dir): return json.dumps({ "success": False, "message": f"WORK_DIR path does not exist: {work_dir}" }) files = os.listdir(work_dir) return json.dumps({ "success": True, "message": f"Found {len(files)} items in {work_dir}.", "data": files }) except Exception as e: return json.dumps({ "success": False, "message": f"Unexpected error: {str(e)}" }) @mcp.tool() def load_csv(filename: str) -> str: """Load a CSV file from WORK_DIR into global df and return the first 5 rows.""" global df work_dir = os.environ.get("WORK_DIR") if not work_dir: return json.dumps({ "success": False, "message": "WORK_DIR environment variable is not set." }) file_path = os.path.join(work_dir, filename) if not os.path.exists(file_path): return json.dumps({ "success": False, "message": f"CSV file not found at: {file_path}" }) try: df = pd.read_csv(file_path) preview = df.head().to_dict(orient="records") return json.dumps({ "success": True, "message": f"CSV loaded successfully from {file_path}.", "data": f"The first 5 rows of the file are: \n{preview}" }) except Exception as e: return json.dumps({ "success": False, "message": f"Error loading CSV: {str(e)}" }) @mcp.tool() def handle_column_mixed_types() -> str: """Finds and handles any columns with mixed datatypes.""" global df if isinstance(df, list): return json.dumps({ "success": False, "message": "No dataframe is loaded in the system!" }) if df.empty: return json.dumps({ "success": False, "message": "The dataframe is loaded in the system is empty!" }) try: df, mixed_type_columns = standardize_mixed_type_columns(df) return json.dumps({ "success": True, "mixed_type_columns": f"These were the mixed type columns that were found: {mixed_type_columns}", "message": f"The mixed type columns were handles successfully!" }) except Exception as e: return json.dumps({ "success": False, "message": f"Error loading CSV: {str(e)}" }) @mcp.tool() def describe_df() -> str: """Return pandas describe() summary for the global dataframe.""" global df if isinstance(df, list): return json.dumps({ "success": False, "message": "No dataframe is loaded in the system!" }) if df.empty: return json.dumps({ "success": False, "message": "The dataframe is loaded in the system is empty!" }) try: description = df.describe(include='all').to_dict() return json.dumps({ "success": True, "message": "DataFrame summary using describe()", "data": description }, indent=2) except Exception as e: return json.dumps({ "success": False, "message": f"Error describing dataframe: {str(e)}" }) @mcp.tool() def generate_correlation_matrix() -> str: """Generate correlation matrix heatmap and return/save both matrix and image.""" global df if isinstance(df, list): return json.dumps({ "success": False, "message": "No dataframe is loaded in the system!" }) try: # Compute correlation matrix for numeric columns corr = df.corr(numeric_only=True) # Plot heatmap fig, ax = plt.subplots(figsize=(10, 8)) sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", square=True, ax=ax) ax.set_title("Correlation Matrix") # Save figure filename = "correlation_matrix.png" work_dir = os.environ.get("WORK_DIR") save_path = save_plot_to_claude_graphs(fig, filename, work_dir) # Convert matrix to nested dict for JSON corr_dict = corr.round(3).to_dict() return json.dumps({ "success": True, "message": f"Correlation matrix saved to {save_path}", "data": corr_dict, "imagePath": save_path }, indent=2) except Exception as e: return json.dumps({ "success": False, "message": f"Error generating correlation matrix: {str(e)}" }) @mcp.tool() def plot_graph( graph_type: str, x_column: str, y_column: str = None, output_filename: str = "graph.png" ) -> str: """ Plot a graph using seaborn/matplotlib based on the loaded DataFrame and save it to 'claude_graphs/'. Parameters: ---------- graph_type : str The type of graph to generate. Supported values: - 'line': Line plot - 'bar': Bar chart - 'scatter': Scatter plot - 'hist': Histogram with KDE - 'box': Box plot - 'violin': Violin plot - 'pie': Pie chart (only x_column used, for category frequencies) - 'count': Count plot (like bar chart for categorical frequency) - 'kde': Kernel Density Estimation plot x_column : str The column from the DataFrame to be used as the x-axis. y_column : str, optional (default = None) The column to be used for the y-axis. Some plots like 'pie' and 'count' ignore this. output_filename : str, optional (default = 'graph.png') The filename for the saved graph image inside the 'claude_graphs/' directory. """ global df if isinstance(df, list): return json.dumps({ "success": False, "message": "No dataframe is loaded in the system!" }) if x_column not in df.columns or (y_column and y_column not in df.columns): return json.dumps({ "success": False, "message": f"One or both columns not found in the dataframe: {x_column}, {y_column}" }) try: sns.set_theme(style="whitegrid") fig, ax = plt.subplots(figsize=(10, 6)) if graph_type == "line": sns.lineplot(data=df, x=x_column, y=y_column, ax=ax) elif graph_type == "bar": sns.barplot(data=df, x=x_column, y=y_column, ax=ax) elif graph_type == "scatter": sns.scatterplot(data=df, x=x_column, y=y_column, ax=ax) elif graph_type == "hist": sns.histplot(data=df, x=x_column, ax=ax, kde=True) elif graph_type == "box": sns.boxplot(data=df, x=x_column, y=y_column, ax=ax) elif graph_type == "violin": sns.violinplot(data=df, x=x_column, y=y_column, ax=ax) elif graph_type == "pie": pie_data = df[x_column].value_counts() ax.pie(pie_data, labels=pie_data.index, autopct='%1.1f%%', startangle=90) ax.axis('equal') elif graph_type == "count": sns.countplot(data=df, x=x_column, ax=ax) elif graph_type == "kde": sns.kdeplot(data=df, x=x_column, ax=ax, fill=True) else: return json.dumps({ "success": False, "message": f"""Unsupported graph type: {graph_type}. You can use only these types: \n line, bar, scatter, hist, box, violin, pie, count, kde. If further graph types are needed to be supported please as admin to add them. """ }) ax.set_title(f"{graph_type.capitalize()} Plot: {x_column}" + (f" vs {y_column}" if y_column else "")) work_dir = os.environ.get("WORK_DIR") save_path = save_plot_to_claude_graphs(fig, output_filename, work_dir) return json.dumps({ "success": True, "message": f"Graph saved to {save_path}", "imagePath": save_path }) except Exception as e: return json.dumps({ "success": False, "message": f"Error generating plot: {str(e)}" }) @mcp.tool() def run_custom_graph_code(code: str) -> str: """ Run custom Python code provided by Claude to generate a single graph using global df. 📌 Claude Instructions: ----------------------- ✅ Use the global `df` (do not load or define it again) ✅ Create **only one graph at a time** ✅ Generate a matplotlib figure: `fig, ax = plt.subplots()` ✅ Define `output_filename` as the name of the graph file (e.g., 'myplot.png') ✅ Call: `save_path = save_plot_to_claude_graphs(fig, output_filename, work_dir)` ❌ Do NOT manually call plt.savefig() ❌ Do NOT skip assigning `save_path` Your job is to just create the graph and assign `save_path`. This tool handles execution and confirmation. Parameters: ----------- code : str Valid Python code string that uses the global `df`, defines: - `fig` - `output_filename` - `save_path = save_plot_to_claude_graphs(...)` Returns: -------- JSON string with: - success: Whether the execution was successful - message: Description of outcome - imagePath: Path where the image was saved (if successful) """ global df if isinstance(df, list): return json.dumps({ "success": False, "message": "No dataframe is loaded in the system!" }) try: exec_context = { "df": df, "os": os, "save_plot_to_claude_graphs": save_plot_to_claude_graphs, "plt": plt, "sns": sns, "pd": pd, "work_dir": os.environ.get("WORK_DIR") } exec(code, exec_context) save_path = exec_context.get("save_path") if not save_path or not os.path.exists(save_path): return json.dumps({ "success": False, "message": "Code executed but save_path is missing or the file does not exist. Make sure to call save_plot_to_claude_graphs(...) and assign it to save_path." }) return json.dumps({ "success": True, "message": "Code executed and graph saved successfully.", "imagePath": save_path }) except Exception as e: return json.dumps({ "success": False, "message": f"Error while executing custom code: {str(e)}" }) @mcp.tool() def handle_null_values(strategy: str, columns: list[str] = None) -> str: """ Handle null values in the loaded df using a given strategy and save the modified file. Parameters: ---------- strategy : str Strategy to handle nulls: - 'remove' : Drop rows with nulls in the selected columns - 'mean' : Fill with mean - 'median' : Fill with median - 'mode' : Fill with mode - 'ffill' : Forward fill - 'bfill' : Backward fill - 'constant:X' : Fill with constant value X columns : list[str], optional List of columns to apply the strategy on (default: all columns) Returns: ------- JSON string with: - success: True/False - message: Outcome summary - outputFileName: Name of the new modified file """ try: global df if columns is None or len(columns) == 0: columns = df.columns.tolist() for col in columns: if col not in df.columns: return json.dumps({ "success": False, "message": f"Column '{col}' not found in the file." }) if strategy == "remove": df.dropna(subset=[col], inplace=True) elif strategy == "mean": df[col].fillna(df[col].mean(), inplace=True) elif strategy == "median": df[col].fillna(df[col].median(), inplace=True) elif strategy == "mode": mode_val = df[col].mode() if not mode_val.empty: df[col].fillna(mode_val[0], inplace=True) elif strategy == "ffill": df[col].fillna(method="ffill", inplace=True) elif strategy == "bfill": df[col].fillna(method="bfill", inplace=True) elif strategy.startswith("constant:"): value = strategy.split("constant:")[1] df[col].fillna(value, inplace=True) else: return json.dumps({ "success": False, "message": f"Invalid strategy: {strategy}" }) return json.dumps({ "success": True, "message": f"The df has been modified successfully!", }) except Exception as e: return json.dumps({ "success": False, "message": f"Error: {str(e)}" }) @mcp.tool() def run_custom_df_edit_code(code: str) -> str: """ Run custom Python code from Claude to modify the global dataframe (`df`). 🧠 Claude Instructions: ----------------------- ✅ Use the global variable `df` — already loaded ✅ Modify `df` in-place or reassign `df = ...` ❌ Do NOT save the dataframe (saving is handled by a separate tool) ✅ Only modify one thing at a time — column drop, renaming, type conversion, etc. Parameters: ----------- code : str Python code that modifies the global `df`. Returns: -------- JSON string with: - success: True/False - message: Outcome summary """ global df if isinstance(df, list): return json.dumps({ "success": False, "message": "No dataframe is loaded in the system!" }) try: exec_context = { "df": df, "pd": pd, "os": os } exec(code, exec_context) # Reassign df if Claude changed it df = exec_context.get("df", df) return json.dumps({ "success": True, "message": "Code executed successfully. DataFrame updated." }) except Exception as e: return json.dumps({ "success": False, "message": f"Error executing code: {str(e)}" }) @mcp.tool() def save_global_df(filename: str) -> str: """ Save the current global DataFrame to a file in the WORK_DIR. Parameters: ----------- filename : str The file name to save (e.g., 'output.csv' or 'result.xlsx') """ global df if isinstance(df, list): return json.dumps({ "success": False, "message": "No dataframe is loaded to save." }) try: work_dir = os.environ.get("WORK_DIR") if not work_dir: raise EnvironmentError("WORK_DIR environment variable is not set.") save_path = save_df_to_work_dir(df, filename, work_dir) return json.dumps({ "success": True, "message": f"DataFrame saved successfully as {filename}.", "filePath": save_path }) except Exception as e: return json.dumps({ "success": False, "message": f"Error saving DataFrame: {str(e)}" }) @mcp.tool() def drop_columns(columns: list[str]) -> str: """ Drop specified columns from the global DataFrame. Parameters: ----------- columns : list[str] List of column names to drop from the DataFrame. """ global df if isinstance(df, list): return json.dumps({ "success": False, "message": "No dataframe is loaded in the system!" }) invalid_columns = [col for col in columns if col not in df.columns] if invalid_columns: return json.dumps({ "success": False, "message": f"Error: Columns not found: {', '.join(invalid_columns)}" }) try: df.drop(columns=columns, inplace=True) return json.dumps({ "success": True, "message": f"Columns dropped: {', '.join(columns)}" }) except Exception as e: return json.dumps({ "success": False, "message": f"Error dropping columns: {str(e)}" }) @mcp.tool() def rename_columns(column_mapping: dict) -> str: """ Rename columns in the global DataFrame. Parameters: ----------- column_mapping : dict Dictionary mapping old column names to new ones (e.g. {"old1": "new1"}). """ global df if isinstance(df, list): return json.dumps({ "success": False, "message": "No dataframe is loaded in the system!" }) invalid_columns = [col for col in column_mapping if col not in df.columns] if invalid_columns: return json.dumps({ "success": False, "message": f"Error: Columns not found: {', '.join(invalid_columns)}" }) try: df.rename(columns=column_mapping, inplace=True) return json.dumps({ "success": True, "message": f"Columns renamed: {column_mapping}" }) except Exception as e: return json.dumps({ "success": False, "message": f"Error renaming columns: {str(e)}" })

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mudit14224/Vibe-Data-Analysis'

If you have feedback or need assistance with the MCP directory API, please join our Discord server