Skip to main content
Glama

Excel MCP Server

by Aifornorms
server.py29.6 kB
import logging import sys import os from os import path from typing import Any, List, Dict, Optional from mcp.server.fastmcp import FastMCP, Context import pandas as pd import matplotlib.pyplot as plt from .data_handlers import ExcelDataHandler as ExcelHandler os.environ["MODIN_ENGINE"] = "dask" # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", handlers=[logging.StreamHandler(sys.stdout), logging.FileHandler("excel-mcp.log")], force=True, ) logger = logging.getLogger("excel-mcp") # Get Excel files path from environment or use default EXCEL_FILES_PATH = os.environ.get("EXCEL_FILES_PATH", "./excel_files") # Create the directory if it doesn't exist os.makedirs(EXCEL_FILES_PATH, exist_ok=True) # Initialize FastMCP server mcp = FastMCP( "excel-mcp", version="0.1.0", description="MCP server for Excel and CSV file operations, file paths should use relative addresses", ) @mcp.tool() def list_worksheets(filepath: str) -> List[str]: """Get all worksheet names from the specified Excel or CSV file. Args: filepath: Relative or absolute path to the target file Returns: List[str]: List containing all worksheet names Raises: FileNotFoundError: If the specified file path does not exist ValueError: If the file format is invalid or not an Excel/CSV file """ excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: sheet_names = excel_handler.get_sheet_names(filepath) return f"Total {len(sheet_names)}\n" + "\n".join(sheet_names) except Exception as e: logger.error(f"Error getting sheet names: {e}") raise @mcp.tool() def list_columns(filepath: str, sheet_name: str) -> str: """Get all column names and their data types from the specified worksheet in the file. Args: filepath: Relative or absolute path to the target file sheet_name: Name of the worksheet to get column names from (for CSV files, this parameter will be ignored) Returns: str: Formatted string containing column names and data types Raises: FileNotFoundError: If the specified file path does not exist ValueError: If the specified worksheet name does not exist """ excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: df = excel_handler.read_data( excel_handler.get_file_path(filepath), sheet_name=sheet_name ) columns = df.columns.tolist() dtypes = df.dtypes # Calculate maximum column name length for alignment max_col_len = max(len(str(col)) for col in columns) if columns else 0 max_col_len = max(max_col_len, 10) # Minimum width is 10 # Generate formatted table output header = f"Total {len(columns)} columns\n" + "-" * (max_col_len + 10) + "\n" header += f"{'Column Name'.ljust(max_col_len)} Type\n" header += "-" * (max_col_len + 10) + "\n" rows = [f"{str(col).ljust(max_col_len)} {dtypes[col]}" for col in columns] return header + "\n".join(rows) except Exception as e: logger.error(f"Error getting Excel columns: {e}") raise @mcp.tool() def get_random_sample(filepath: str, sheet_name: str, sample_size: int) -> str: """Get random sample data from Excel or CSV file. Args: filepath: Relative or absolute path to the target file sheet_name: Name of the worksheet to sample from (for CSV files, this parameter will be ignored) sample_size: Number of rows to sample Returns: str: String representation of the random sample data Raises: FileNotFoundError: If the specified file path does not exist ValueError: Thrown when sample size is greater than dataset size """ excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: df = excel_handler.read_data( excel_handler.get_file_path(filepath), sheet_name=sheet_name ) sample_df = excel_handler.get_random_sample( df, sample_size, sheet_name=sheet_name ) return sample_df.to_json(orient="records", force_ascii=False) except Exception as e: logger.error(f"Error getting random sample: {e}") raise @mcp.tool() def analyze_missing_values(filepath: str, sheet_name: str) -> str: """Get missing data information from Excel or CSV file. Args: filepath: Relative or absolute path to the target file sheet_name: Name of the worksheet to analyze (for CSV files, this parameter will be ignored) Returns: str: Detailed statistical information containing the number and rate of missing values for each column Raises: FileNotFoundError: If the specified file path does not exist ValueError: If the worksheet does not exist or the file format is invalid """ excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: df = excel_handler.read_data( excel_handler.get_file_path(filepath), sheet_name=sheet_name ) return excel_handler.get_missing_values_info(df) except Exception as e: logger.error(f"Error getting Excel sheet missing values info: {e}") raise @mcp.tool() def analyze_unique_values( filepath: str, sheet_name: str, max_unique: int = 10, ) -> Dict[str, Any]: """Get the distribution of unique values for specified columns in an Excel or CSV file. Args: filepath: Relative or absolute path to the target file sheet_name: Name of the worksheet to analyze (for CSV files, this parameter will be ignored) max_unique: Maximum number of unique values to display for each column, beyond this number only statistics are shown Returns: Dict[str, Any]: Dictionary containing detailed information about the distribution of unique values for each column Raises: FileNotFoundError: If the specified file path does not exist ValueError: If the worksheet does not exist or the specified column name does not exist """ excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: df = excel_handler.read_data( excel_handler.get_file_path(filepath), sheet_name=sheet_name ) return excel_handler.get_data_unique_values( df, columns=None, max_unique=max_unique ) except Exception as e: logger.error(f"Error getting Excel sheet unique values: {e}") raise @mcp.tool() def analyze_data_overview(filepath: str, sheet_name: str) -> str: """Data analysis first choice: Get a complete data overview of the worksheet. Perform comprehensive data analysis, including data type statistics, missing value analysis, non-null value counts and other key metric checks. Args: filepath: Relative or absolute path to the target file sheet_name: Name of the worksheet to analyze (for CSV files, this parameter will be ignored) Returns: str: Detailed information string containing worksheet data analysis results Raises: FileNotFoundError: If the specified file path does not exist ValueError: If the worksheet does not exist or the file format is invalid """ excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: # Read data df = excel_handler.read_data( excel_handler.get_file_path(filepath), sheet_name=sheet_name ) # Get row and column data num_rows, num_cols = df.shape # Get missing values information missing_values_info = excel_handler.get_missing_values_info(df) # Convert data type information to string format dtypes_str = "\n".join( [f" {col}: {dtype}" for col, dtype in df.dtypes.items()] ) # Build the final result string result = f""" Data Analysis Results: 1. Data Scale: Total Rows: {num_rows} Total Columns: {num_cols} 2. Data Types: {dtypes_str} """ return result except Exception as e: error_msg = f"Error inspecting Excel sheet data: {e}" logger.error(error_msg) return error_msg @mcp.tool() def analyze_correlations( filepath: str, sheet_name: str, method: str = "pearson", min_correlation: float = 0.5, ) -> str: """Get correlations between columns in an Excel or CSV file. Args: filepath: Relative or absolute path to the target file sheet_name: Name of the worksheet to analyze (for CSV files, this parameter will be ignored) method: Correlation analysis method, supports 'pearson', 'spearman', 'kendall' min_correlation: Correlation coefficient threshold, only returns results where the absolute value of the correlation coefficient is greater than this value Returns: str: Detailed result string containing column correlation analysis Raises: FileNotFoundError: If the specified file path does not exist ValueError: If the worksheet does not exist, invalid correlation calculation method, or data types are not suitable for correlation calculation """ excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: sheet_names = excel_handler.get_sheet_names(filepath) if sheet_name not in sheet_names: raise ValueError(f"Sheet '{sheet_name}' not found. Available sheets: {sheet_names}") df = excel_handler.read_data( excel_handler.get_file_path(filepath), sheet_name=sheet_name ) return excel_handler.get_column_correlation(df, method, min_correlation) except FileNotFoundError as e: logger.error(f"File not found: {filepath}") return f"Error: File not found: {filepath}" except ValueError as e: logger.error(f"Invalid sheet name or file format: {e}") return f"Error: {str(e)}" except Exception as e: logger.error(f"Unexpected error: {e}") return f"Error: {str(e)}" @mcp.tool() def analyze_numeric_stats( filepath: str, sheet_name: str, columns: List[str] ) -> Dict[str, Any]: """Get statistical information for numeric columns, including mean, median, standard deviation, quantiles, etc. Args: filepath: Source file path sheet_name: Worksheet name (for CSV files, this parameter will be ignored) columns: List of column names to analyze, defaults to all numeric columns, at least one column. All columns must be numeric type. Maximum of 10 columns. Returns: Dict[str, Any]: Dictionary containing statistical information for each numeric column """ excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: df = excel_handler.read_data( excel_handler.get_file_path(filepath), sheet_name=sheet_name ) numerical_cols = ( df.select_dtypes(include=["int64", "float64"]).columns if columns is None else columns ) # Get basic statistical information stats = df[numerical_cols].describe() # Calculate the sum of numeric columns sums = df[numerical_cols].sum(numeric_only=True, skipna=True) stats.loc["sum"] = sums return stats.to_json(orient="records", force_ascii=False) except Exception as e: logger.error(f"Error calculating statistics: {e}") raise @mcp.tool() def analyze_group_stats( filepath: str, sheet_name: str, group_by: str, agg_columns: List[str], agg_functions: List[str] = ["mean", "count"], ) -> str: """Group by specified column and calculate statistics. Args: filepath: Source file path sheet_name: Worksheet name (for CSV files, this parameter will be ignored) group_by: Column name to group by agg_columns: List of column names to calculate statistics for agg_functions: List of statistical functions, supports 'mean', 'sum', 'count', 'min', 'max', etc. Returns: str: String representation of group statistics results """ excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: df = excel_handler.read_data( excel_handler.get_file_path(filepath), sheet_name=sheet_name ) grouped = df.groupby(group_by)[agg_columns].agg(agg_functions) # Sort by the result of the first aggregation function of the first statistical column in descending order first_col = agg_columns[0] first_func = agg_functions[0] sort_col = ( (first_col, first_func) if isinstance(grouped.columns, pd.MultiIndex) else first_col ) sorted_grouped = grouped.sort_values(by=sort_col, ascending=False) return sorted_grouped.to_string() except Exception as e: logger.error(f"Error in group statistics: {e}") raise @mcp.tool() def analyze_time_series( filepath: str, sheet_name: str, date_column: str, value_column: str, freq: str = "M" ) -> str: """Analyze time series data, including trends, seasonality, etc. Args: filepath: Source file path sheet_name: Worksheet name (for CSV files, this parameter will be ignored) date_column: Date column name value_column: Value column name freq: Resampling frequency, such as 'D' (day), 'M' (month), 'Y' (year) Returns: str: Time series analysis results """ excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: df = excel_handler.read_data( excel_handler.get_file_path(filepath), sheet_name=sheet_name ) df[date_column] = pd.to_datetime(df[date_column]) df = df.set_index(date_column) # Resample and calculate statistics resampled = df[value_column].resample(freq).agg(["mean", "min", "max", "count"]) return f"Time series analysis results (frequency: {freq}):\n{resampled.to_string()}" except Exception as e: logger.error(f"Error in time series analysis: {e}") raise @mcp.tool() def analyze_data(filepath: str, sheet_name: str, python_code: str) -> str: """ Data analysis tool for executing Python code and capturing output. This tool is designed for data exploration and debugging, allowing analysis of DataFrame data through custom Python code execution. Output from all print statements will be captured and returned, making it easy to view intermediate calculation results. Usage limitations: Only for data analysis; cannot modify source files or generate new files. Chart drawing functionality is not supported (please use visualize_data). Code execution results are for display only and cannot be called by other functions. Recommended to optimize print output and avoid lengthy data printing. Critical Requirement: The Python code must include import pandas as pd inside the main function to use pandas functionalities. Placing import pandas as pd outside the main function is incorrect and will raise a ValueError. This ensures portability and compatibility with the execution environment. Filepath Handling: The filepath is resolved relative to the environment variable EXCEL_FILES_PATH (default: ./excel_files). For example, if filepath is data.csv, the tool looks for the file at ./excel_files/data.csv. Absolute paths are also supported but must be valid. Args: filepath: Path to the Excel or CSV file, either relative to EXCEL_FILES_PATH (e.g., data.csv) or absolute (e.g., /path/to/data.csv). The file must exist. sheet_name: Name of the worksheet to process (for CSV files, this parameter will be ignored). python_code: Python code containing a main function that takes a DataFrame parameter (df). Mandatory: The main function must include import pandas as pd as its first line to use pandas functionalities. Prohibited: Do not place import pandas as pd or any other imports outside the main function. The code must be a pure function, using only the input DataFrame (df) and avoiding side effects like reading or writing files. The df is provided by the tool, loaded from the specified filepath. Returns: str: String containing all print outputs and the return value of the main function. Raises: Taxes: ValueError: If the Python code has a format error, lacks a main function, places import pandas as pd outside the main function, omits import pandas as pd inside the main function, includes side effects, or if the filepath cannot be resolved (e.g., file not found). TypeError: If the main function’s return value type is invalid. Examples: Correct Example: Filepath: data.csv (resolves to ./excel_files/data.csv) Python Code: def main(df): import pandas as pd # Display first 5 rows of data print(df.head()) return "Data analysis completed" Incorrect Example (Will Raise ValueError): Filepath: data.csv Python Code: import pandas as pd # Wrong: Import outside main() def main(df): print(df.head()) return "Data analysis completed" Incorrect Filepath Example (Will Raise ValueError): Filepath: nonexistent.csv (file not found at ./excel_files/nonexistent.csv) """ # Initialize Excel handler excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: return excel_handler.run_code_only_log( filepath, python_code, sheet_name=sheet_name ) except Exception as e: logger.error(f"Error processing Excel file: {e}") raise @mcp.tool() def save_transformed_data( filepath: str, sheet_name: str, python_code: str, result_file_path: str, default_sheet_name: str = "Sheet1", ) -> str: """Execute Python code to generate Excel or CSV file data, supporting single or multiple sheet processing. This tool executes Python code to transform DataFrame data and save the results to an Excel or CSV file. The code must be a pure function, avoiding side effects such as reading external files, modifying external files, or generating visualizations. The input DataFrame ('df') represents the data from the specified filepath and sheet_name. The execution environment provides the following pre-defined variable: 'pd': The pandas library Critical Requirement: To ensure portability and compatibility, the python_code must include import pandas as pd inside the main function. Placing import pandas as pd outside the main function is incorrect and will raise a ValueError. This applies even though the pd variable is available in the environment. Args: filepath: Source file path sheet_name: Source worksheet name (for CSV files, this parameter will be ignored) python_code: Python code containing a main function that takes a DataFrame parameter and returns a pandas.DataFrame or Dict[str, DataFrame]. Mandatory: The main function must include import pandas as pd as its first line to use pandas functionalities. Prohibited: Do not place import pandas as pd or any other imports outside the main function. The main function must only use the input DataFrame ('df') and avoid reading external files or generating side effects. When returning a DataFrame, data will be saved to the worksheet specified by default_sheet_name. When returning a Dict[str, DataFrame], dictionary keys are worksheet names and values are corresponding DataFrames. result_file_path: Path to save the result file default_sheet_name: Default worksheet name, used when python_code returns a single DataFrame Returns: str: Execution result information, including information about generated worksheets Raises: ValueError: If the Python code has a format error, lacks a main function, places import pandas as pd outside the main function, returns an invalid type, or includes side effects (e.g., reading external files). TypeError: If the main function's return value is neither a DataFrame nor a Dict[str, DataFrame]. Examples: Correct Example (Single DataFrame): def main(df): import pandas as pd return df.groupby('category').sum() Incorrect Example (Will Raise ValueError): import pandas as pd # Wrong: Import outside main() def main(df): return df.groupby('category').sum() Correct Example (Multiple Sheets): def main(df): import pandas as pd result = {} for category in df['category'].unique(): result[f"Sheet_{category}"] = df[df['category'] == category][['value']].copy() return result """ # Initialize handlers excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: return excel_handler.run_code( filepath, python_code, sheet_name=sheet_name, result_file_path=result_file_path, result_sheet_name=default_sheet_name, ) except Exception as e: logger.error(f"Error executing Excel code: {e}") raise @mcp.tool() def plot_matplotlib_chart( filepath: str, sheet_name: str, save_path: str, python_code: str, ) -> str: """Dedicated function for creating visualization charts from Excel or CSV data. Args: filepath: Source file path sheet_name: Worksheet name (for CSV files, this parameter will be ignored) save_path: Path to save the chart python_code: Python code to execute, defined as def main(df, plt), can use matplotlib for visualization, returns plt object, no need to save Returns: str: Execution result information, please provide the relative path of the result file to the user Raises: ValueError: When chart type is not supported or data column does not exist FileNotFoundError: When file does not exist """ excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: return excel_handler.run_code_with_plot( filepath, python_code, save_path, sheet_name=sheet_name ) except Exception as e: logger.error(f"Error generating chart: {e}") raise @mcp.tool() def plot_pyecharts_chart( filepath: str, sheet_name: str, save_path: str, python_code: str, theme: str = "light", title: str = None, ) -> str: """Dedicated function for generating a single interactive chart using Pyecharts. This tool executes Python code to create a single interactive chart using Pyecharts. The code must define a main(df) function that returns a single Pyecharts chart object (e.g., Bar, Line, Pie). The chart is saved as an HTML file. Critical Requirement: The Python code must include `import pandas as pd` and Pyecharts imports (e.g., `from pyecharts.charts import Bar`) inside the main function to ensure portability. Imports outside the main function will raise a ValueError. Args: filepath: Relative or absolute path to the Excel or CSV file sheet_name: Worksheet name (for CSV files, this parameter is ignored) save_path: Path to save the chart, must end with .html python_code: Python code to execute, defined as def main(df), returns a single Pyecharts chart object theme: Pyecharts theme (e.g., 'light', 'dark', 'chalk', 'vintage'), defaults to 'light' title: Optional title for the chart, displayed in the HTML page Returns: str: Execution result information, including the generated HTML file path Raises: ValueError: If save_path does not end with .html, code lacks main function, or includes invalid imports FileNotFoundError: If the input file does not exist TypeError: If the return value is not a single Pyecharts chart object Example: ```python def main(df): import pandas as pd from pyecharts.charts import Bar from pyecharts import options as opts bar = ( Bar() .add_xaxis(df['category'].tolist()) .add_yaxis("Values", df['value'].tolist()) .set_global_opts(title_opts=opts.TitleOpts(title="Bar Chart")) ) return bar""" excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: return excel_handler.run_code_with_pyecharts( filepath, python_code, save_path, theme=theme, title=title, sheet_name=sheet_name ) except Exception as e: logger.error(f"Error generating Pyecharts chart: {e}") raise @mcp.tool() def plot_pyecharts_dashboard( filepath: str, sheet_name: str, save_path: str, python_code: str, theme: str = "light", title: str = None, # NEW: Added title parameter ) -> str: """Dedicated function for generating interactive Pyecharts dashboards with multiple charts. This tool executes Python code to create a dashboard with multiple charts arranged using Pyecharts Page, Grid, or Tab layouts. The code must define a main(df) function that returns a Pyecharts Page, Grid, or Tab object. The dashboard is saved as an HTML file. Critical Requirement: The Python code must include `import pandas as pd` and Pyecharts imports (e.g., `from pyecharts.charts import Bar`) inside the main function to ensure portability. Imports outside the main function will raise a ValueError. Args: filepath: Relative or absolute path to the Excel or CSV file sheet_name: Worksheet name (for CSV files, this parameter is ignored) save_path: Path to save the dashboard, must end with .html python_code: Python code to execute, defined as def main(df), returns a Pyecharts Page, Grid, or Tab object theme: Pyecharts theme (e.g., 'light', 'dark', 'chalk', 'vintage'), defaults to 'light' Returns: str: Execution result information, including the generated HTML file path Raises: ValueError: If save_path does not end with .html, code lacks main function, or includes invalid imports FileNotFoundError: If the input file does not exist TypeError: If the return value is not a Pyecharts Page, Grid, or Tab object Example: ```python def main(df): import pandas as pd from pyecharts.charts import Bar, Line from pyecharts import options as opts from pyecharts.charts import Page page = Page(layout=Page.SimplePageLayout) bar = ( Bar() .add_xaxis(df['category'].tolist()) .add_yaxis("Values", df['value'].tolist()) .set_global_opts(title_opts=opts.TitleOpts(title="Bar Chart")) ) line = ( Line() .add_xaxis(df['category'].tolist()) .add_yaxis("Values", df['value'].tolist()) .set_global_opts(title_opts=opts.TitleOpts(title="Line Chart")) ) page.add(bar, line) return page """ excel_handler = ExcelHandler(path.join(EXCEL_FILES_PATH, "")) try: return excel_handler.run_code_with_pyecharts_dashboard( filepath, python_code, save_path, theme=theme, title=title, sheet_name=sheet_name ) except Exception as e: logger.error(f"Error generating Pyecharts dashboard: {e}") raise async def run_server(): """Start Excel and CSV file processing MCP server.""" try: if not os.path.exists(EXCEL_FILES_PATH): logger.error(f"EXCEL_FILES_PATH directory does not exist: {EXCEL_FILES_PATH}") raise ValueError(f"EXCEL_FILES_PATH directory does not exist: {EXCEL_FILES_PATH}") logger.info(f"Starting Excel/CSV MCP server (files directory: {EXCEL_FILES_PATH})") await mcp.run_sse_async() except KeyboardInterrupt: logger.info("Server stopped by user") await mcp.shutdown() except Exception as e: logger.error(f"Server failed: {e}") raise finally: logger.info("Server shutdown complete")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Aifornorms/ExcelMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server