read_data_from_excel
Extract structured data from Excel worksheets, including cell values, addresses, and validation rules, in JSON format. Specify filepath, sheet name, and optional cell range for precise data retrieval.
Instructions
Read data from Excel worksheet with cell metadata including validation rules.
Args:
filepath: Path to Excel file
sheet_name: Name of worksheet
start_cell: Starting cell (default A1)
end_cell: Ending cell (optional, auto-expands if not provided)
preview_only: Whether to return preview only
Returns:
JSON string containing structured cell data with validation metadata.
Each cell includes: address, value, row, column, and validation info (if any).
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| end_cell | No | ||
| filepath | Yes | ||
| preview_only | No | ||
| sheet_name | Yes | ||
| start_cell | No | A1 |
Implementation Reference
- src/excel_mcp/server.py:195-236 (handler)Main tool handler decorated with @mcp.tool(). Parses arguments, resolves file path, calls the core read_excel_range_with_metadata helper, and returns JSON-formatted result.@mcp.tool() def read_data_from_excel( filepath: str, sheet_name: str, start_cell: str = "A1", end_cell: Optional[str] = None, preview_only: bool = False ) -> str: """ Read data from Excel worksheet with cell metadata including validation rules. Args: filepath: Path to Excel file sheet_name: Name of worksheet start_cell: Starting cell (default A1) end_cell: Ending cell (optional, auto-expands if not provided) preview_only: Whether to return preview only Returns: JSON string containing structured cell data with validation metadata. Each cell includes: address, value, row, column, and validation info (if any). """ try: full_path = get_excel_path(filepath) from excel_mcp.data import read_excel_range_with_metadata result = read_excel_range_with_metadata( full_path, sheet_name, start_cell, end_cell ) if not result or not result.get("cells"): return "No data found in specified range" # Return as formatted JSON string import json return json.dumps(result, indent=2, default=str) except Exception as e: logger.error(f"Error reading data: {e}") raise
- src/excel_mcp/data.py:170-280 (helper)Core helper function that loads the workbook using openpyxl, parses the cell range, extracts cell values and metadata (including data validation rules), and returns a structured dictionary of cell data used by the handler.def read_excel_range_with_metadata( filepath: Path | str, sheet_name: str, start_cell: str = "A1", end_cell: Optional[str] = None, include_validation: bool = True ) -> Dict[str, Any]: """Read data from Excel range with cell metadata including validation rules. Args: filepath: Path to Excel file sheet_name: Name of worksheet start_cell: Starting cell address end_cell: Ending cell address (optional) include_validation: Whether to include validation metadata Returns: Dictionary containing structured cell data with metadata """ try: wb = load_workbook(filepath, read_only=False) if sheet_name not in wb.sheetnames: raise DataError(f"Sheet '{sheet_name}' not found") ws = wb[sheet_name] # Parse start cell if ':' in start_cell: start_cell, end_cell = start_cell.split(':') # Get start coordinates try: start_coords = parse_cell_range(f"{start_cell}:{start_cell}") if not start_coords or not all(coord is not None for coord in start_coords[:2]): raise DataError(f"Invalid start cell reference: {start_cell}") start_row, start_col = start_coords[0], start_coords[1] except ValueError as e: raise DataError(f"Invalid start cell format: {str(e)}") # Determine end coordinates if end_cell: try: end_coords = parse_cell_range(f"{end_cell}:{end_cell}") if not end_coords or not all(coord is not None for coord in end_coords[:2]): raise DataError(f"Invalid end cell reference: {end_cell}") end_row, end_col = end_coords[0], end_coords[1] except ValueError as e: raise DataError(f"Invalid end cell format: {str(e)}") else: # If no end_cell, use the full data range of the sheet if ws.max_row == 1 and ws.max_column == 1 and ws.cell(1, 1).value is None: # Handle empty sheet end_row, end_col = start_row, start_col else: # Use the sheet's own boundaries, but respect the provided start_cell end_row, end_col = ws.max_row, ws.max_column # If start_cell is 'A1' (default), we should find the true start if start_cell == 'A1': start_row, start_col = ws.min_row, ws.min_column # Validate range bounds if start_row > ws.max_row or start_col > ws.max_column: # This case can happen if start_cell is outside the used area on a sheet with data # or on a completely empty sheet. logger.warning( f"Start cell {start_cell} is outside the sheet's data boundary " f"({get_column_letter(ws.min_column)}{ws.min_row}:{get_column_letter(ws.max_column)}{ws.max_row}). " f"No data will be read." ) return {"range": f"{start_cell}:", "sheet_name": sheet_name, "cells": []} # Build structured cell data range_str = f"{get_column_letter(start_col)}{start_row}:{get_column_letter(end_col)}{end_row}" range_data = { "range": range_str, "sheet_name": sheet_name, "cells": [] } for row in range(start_row, end_row + 1): for col in range(start_col, end_col + 1): cell = ws.cell(row=row, column=col) cell_address = f"{get_column_letter(col)}{row}" cell_data = { "address": cell_address, "value": cell.value, "row": row, "column": col } # Add validation metadata if requested if include_validation: validation_info = get_data_validation_for_cell(ws, cell_address) if validation_info: cell_data["validation"] = validation_info else: cell_data["validation"] = {"has_validation": False} range_data["cells"].append(cell_data) wb.close() return range_data except DataError as e: logger.error(str(e)) raise except Exception as e: logger.error(f"Failed to read Excel range with metadata: {e}") raise DataError(str(e))