Excel MCP Server
by fish0710
Verified
import logging
import re
from typing import Any
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.worksheet import Worksheet
from .cell_utils import parse_cell_range, validate_cell_reference
from .exceptions import ValidationError
logger = logging.getLogger(__name__)
def validate_formula_in_cell_operation(
filepath: str,
sheet_name: str,
cell: str,
formula: str
) -> dict[str, Any]:
"""Validate Excel formula before writing"""
try:
wb = load_workbook(filepath)
if sheet_name not in wb.sheetnames:
raise ValidationError(f"Sheet '{sheet_name}' not found")
if not validate_cell_reference(cell):
raise ValidationError(f"Invalid cell reference: {cell}")
# First validate the provided formula's syntax
is_valid, message = validate_formula(formula)
if not is_valid:
raise ValidationError(f"Invalid formula syntax: {message}")
# Additional validation for cell references in formula
cell_refs = re.findall(r'[A-Z]+[0-9]+(?::[A-Z]+[0-9]+)?', formula)
for ref in cell_refs:
if ':' in ref: # Range reference
start, end = ref.split(':')
if not (validate_cell_reference(start) and validate_cell_reference(end)):
raise ValidationError(f"Invalid cell range reference in formula: {ref}")
else: # Single cell reference
if not validate_cell_reference(ref):
raise ValidationError(f"Invalid cell reference in formula: {ref}")
# Now check if there's a formula in the cell and compare
sheet = wb[sheet_name]
cell_obj = sheet[cell]
current_formula = cell_obj.value
# If cell has a formula (starts with =)
if isinstance(current_formula, str) and current_formula.startswith('='):
if formula.startswith('='):
if current_formula != formula:
return {
"message": "Formula is valid but doesn't match cell content",
"valid": True,
"matches": False,
"cell": cell,
"provided_formula": formula,
"current_formula": current_formula
}
else:
if current_formula != f"={formula}":
return {
"message": "Formula is valid but doesn't match cell content",
"valid": True,
"matches": False,
"cell": cell,
"provided_formula": formula,
"current_formula": current_formula
}
else:
return {
"message": "Formula is valid and matches cell content",
"valid": True,
"matches": True,
"cell": cell,
"formula": formula
}
else:
return {
"message": "Formula is valid but cell contains no formula",
"valid": True,
"matches": False,
"cell": cell,
"provided_formula": formula,
"current_content": str(current_formula) if current_formula else ""
}
except ValidationError as e:
logger.error(str(e))
raise
except Exception as e:
logger.error(f"Failed to validate formula: {e}")
raise ValidationError(str(e))
def validate_range_in_sheet_operation(
filepath: str,
sheet_name: str,
start_cell: str,
end_cell: str | None = None,
) -> dict[str, Any]:
"""Validate if a range exists in a worksheet and return data range info."""
try:
wb = load_workbook(filepath)
if sheet_name not in wb.sheetnames:
raise ValidationError(f"Sheet '{sheet_name}' not found")
worksheet = wb[sheet_name]
# Get actual data dimensions
data_max_row = worksheet.max_row
data_max_col = worksheet.max_column
# Validate range
try:
start_row, start_col, end_row, end_col = parse_cell_range(start_cell, end_cell)
except ValueError as e:
raise ValidationError(f"Invalid range: {str(e)}")
# If end not specified, use start
if end_row is None:
end_row = start_row
if end_col is None:
end_col = start_col
# Validate bounds against maximum possible Excel limits
is_valid, message = validate_range_bounds(
worksheet, start_row, start_col, end_row, end_col
)
if not is_valid:
raise ValidationError(message)
range_str = f"{start_cell}" if end_cell is None else f"{start_cell}:{end_cell}"
data_range_str = f"A1:{get_column_letter(data_max_col)}{data_max_row}"
# Check if range is within data or extends beyond
extends_beyond_data = (
end_row > data_max_row or
end_col > data_max_col
)
return {
"message": (
f"Range '{range_str}' is valid. "
f"Sheet contains data in range '{data_range_str}'"
),
"valid": True,
"range": range_str,
"data_range": data_range_str,
"extends_beyond_data": extends_beyond_data,
"data_dimensions": {
"max_row": data_max_row,
"max_col": data_max_col,
"max_col_letter": get_column_letter(data_max_col)
}
}
except ValidationError as e:
logger.error(str(e))
raise
except Exception as e:
logger.error(f"Failed to validate range: {e}")
raise ValidationError(str(e))
def validate_formula(formula: str) -> tuple[bool, str]:
"""Validate Excel formula syntax and safety"""
if not formula.startswith("="):
return False, "Formula must start with '='"
# Remove the '=' prefix for validation
formula = formula[1:]
# Check for balanced parentheses
parens = 0
for c in formula:
if c == "(":
parens += 1
elif c == ")":
parens -= 1
if parens < 0:
return False, "Unmatched closing parenthesis"
if parens > 0:
return False, "Unclosed parenthesis"
# Basic function name validation
func_pattern = r"([A-Z]+)\("
funcs = re.findall(func_pattern, formula)
unsafe_funcs = {"INDIRECT", "HYPERLINK", "WEBSERVICE", "DGET", "RTD"}
for func in funcs:
if func in unsafe_funcs:
return False, f"Unsafe function: {func}"
return True, "Formula is valid"
def validate_range_bounds(
worksheet: Worksheet,
start_row: int,
start_col: int,
end_row: int | None = None,
end_col: int | None = None,
) -> tuple[bool, str]:
"""Validate that cell range is within worksheet bounds"""
max_row = worksheet.max_row
max_col = worksheet.max_column
try:
# Check start cell bounds
if start_row < 1 or start_row > max_row:
return False, f"Start row {start_row} out of bounds (1-{max_row})"
if start_col < 1 or start_col > max_col:
return False, (
f"Start column {get_column_letter(start_col)} "
f"out of bounds (A-{get_column_letter(max_col)})"
)
# If end cell specified, check its bounds
if end_row is not None and end_col is not None:
if end_row < start_row:
return False, "End row cannot be before start row"
if end_col < start_col:
return False, "End column cannot be before start column"
if end_row > max_row:
return False, f"End row {end_row} out of bounds (1-{max_row})"
if end_col > max_col:
return False, (
f"End column {get_column_letter(end_col)} "
f"out of bounds (A-{get_column_letter(max_col)})"
)
return True, "Range is valid"
except Exception as e:
return False, f"Invalid range: {e!s}"