filter_data
Filter data files by applying conditions to specific columns, such as equals, contains, greater_than, or less_than, and optionally save the filtered results.
Instructions
Filter data based on a condition.
Args: file_path: Path to the data file column: Column name to filter on condition: Filter condition (equals, contains, greater_than, less_than) value: Value to filter by output_path: Optional path to save filtered data
Returns: Information about the filtered data
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| file_path | Yes | ||
| column | Yes | ||
| condition | Yes | ||
| value | Yes | ||
| output_path | No |
Implementation Reference
- src/visidata_mcp/server.py:323-404 (handler)The filter_data tool implementation - filters data based on column conditions (equals, contains, greater_than, less_than) using pandas, with optional output to save filtered results
@mcp.tool() def filter_data(file_path: str, column: str, condition: str, value: str, output_path: Optional[str] = None) -> str: """ Filter data based on a condition. Args: file_path: Path to the data file column: Column name to filter on condition: Filter condition (equals, contains, greater_than, less_than) value: Value to filter by output_path: Optional path to save filtered data Returns: Information about the filtered data """ try: import pandas as pd from pathlib import Path file_extension = Path(file_path).suffix.lower() # Load with pandas if file_extension == '.csv': df = pd.read_csv(file_path) elif file_extension == '.json': df = pd.read_json(file_path) elif file_extension in ['.xlsx', '.xls']: df = pd.read_excel(file_path) elif file_extension == '.tsv': df = pd.read_csv(file_path, sep='\t') else: df = pd.read_csv(file_path) if column not in df.columns: return f"Error: Column '{column}' not found. Available columns: {list(df.columns)}" original_rows = len(df) # Apply filter if condition == "equals": filtered_df = df[df[column].astype(str) == value] elif condition == "contains": filtered_df = df[df[column].astype(str).str.contains(value, case=False, na=False)] elif condition == "greater_than": try: numeric_value = float(value) filtered_df = df[pd.to_numeric(df[column], errors='coerce') > numeric_value] except ValueError: return f"Error: Cannot convert '{value}' to number for greater_than comparison" elif condition == "less_than": try: numeric_value = float(value) filtered_df = df[pd.to_numeric(df[column], errors='coerce') < numeric_value] except ValueError: return f"Error: Cannot convert '{value}' to number for less_than comparison" else: return f"Error: Unknown condition '{condition}'. Use: equals, contains, greater_than, less_than" result = { "original_rows": original_rows, "filtered_rows": len(filtered_df), "filter_applied": f"{column} {condition} {value}" } # If output path is specified, save filtered data if output_path: output_extension = Path(output_path).suffix.lower() if output_extension == '.csv': filtered_df.to_csv(output_path, index=False) elif output_extension == '.json': filtered_df.to_json(output_path, orient='records', indent=2) elif output_extension in ['.xlsx', '.xls']: filtered_df.to_excel(output_path, index=False) else: # Default to CSV filtered_df.to_csv(output_path, index=False) result["saved_to"] = output_path return json.dumps(result, indent=2) except Exception as e: return f"Error filtering data: {str(e)}\n{traceback.format_exc()}" - src/visidata_mcp/server.py:323-323 (registration)The @mcp.tool() decorator that registers filter_data as an MCP tool
@mcp.tool()