filter_rows
Filter rows in a CSV dataset by applying conditions with AND or OR logic to include only matching records.
Instructions
Filter rows based on conditions.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| session_id | Yes | ||
| conditions | Yes | ||
| mode | No | and |
Output Schema
| Name | Required | Description | Default |
|---|---|---|---|
No arguments | |||
Implementation Reference
- Core handler function for filter_rows tool. Takes session_id, conditions (list of dicts with column/operator/value), and mode ('and'/'or'). Uses pandas to build a boolean mask from conditions, applies it to the DataFrame, records the operation, and returns row counts.
async def filter_rows( session_id: str, conditions: list[dict[str, Any]], mode: str = "and", ctx: Context = None ) -> dict[str, Any]: """ Filter rows based on conditions. Args: session_id: Session identifier conditions: List of filter conditions, each with: - column: Column name - operator: One of '==', '!=', '>', '<', '>=', '<=', 'contains', 'starts_with', 'ends_with', 'in', 'not_in', 'is_null', 'not_null' - value: Value to compare (not needed for is_null/not_null) mode: 'and' or 'or' to combine multiple conditions ctx: FastMCP context Returns: Dict with success status and filtered row count """ try: manager = get_session_manager() session = manager.get_session(session_id) if not session or session.df is None: return {"success": False, "error": "Invalid session or no data loaded"} df = session.df mask = pd.Series([True] * len(df)) for condition in conditions: column = condition.get("column") operator = condition.get("operator") value = condition.get("value") if column not in df.columns: return {"success": False, "error": f"Column '{column}' not found"} col_data = df[column] if operator == "==": condition_mask = col_data == value elif operator == "!=": condition_mask = col_data != value elif operator == ">": condition_mask = col_data > value elif operator == "<": condition_mask = col_data < value elif operator == ">=": condition_mask = col_data >= value elif operator == "<=": condition_mask = col_data <= value elif operator == "contains": condition_mask = col_data.astype(str).str.contains(str(value), na=False) elif operator == "starts_with": condition_mask = col_data.astype(str).str.startswith(str(value), na=False) elif operator == "ends_with": condition_mask = col_data.astype(str).str.endswith(str(value), na=False) elif operator == "in": condition_mask = col_data.isin(value if isinstance(value, list) else [value]) elif operator == "not_in": condition_mask = ~col_data.isin(value if isinstance(value, list) else [value]) elif operator == "is_null": condition_mask = col_data.isna() elif operator == "not_null": condition_mask = col_data.notna() else: return {"success": False, "error": f"Unknown operator: {operator}"} if mode == "and": mask = mask & condition_mask else: mask = mask | condition_mask session.df = df[mask].reset_index(drop=True) session.record_operation( OperationType.FILTER, { "conditions": conditions, "mode": mode, "rows_before": len(df), "rows_after": len(session.df), }, ) return { "success": True, "rows_before": len(df), "rows_after": len(session.df), "rows_filtered": len(df) - len(session.df), } except Exception as e: logger.error(f"Error filtering rows: {e!s}") return {"success": False, "error": str(e)} - src/csv_editor/server.py:197-202 (registration)MCP tool registration decorator wrapping filter_rows. Imports _filter_rows from transformations module and delegates the call.
@mcp.tool async def filter_rows( session_id: str, conditions: list[dict[str, Any]], mode: str = "and", ctx: Context = None ) -> dict[str, Any]: """Filter rows based on conditions.""" return await _filter_rows(session_id, conditions, mode, ctx) - src/csv_editor/server.py:57-94 (registration)Server info listing 'filter_rows' as a data_manipulation capability.
return { "name": "CSV Editor", "version": "2.0.0", "description": "A comprehensive MCP server for CSV file operations and data analysis", "capabilities": { "data_io": [ "load_csv", "load_csv_from_url", "load_csv_from_content", "export_csv", "multiple_export_formats", ], "data_manipulation": [ "filter_rows", "sort_data", "select_columns", "rename_columns", "add_column", "remove_columns", "change_column_type", "fill_missing_values", "remove_duplicates", ], "data_analysis": [ "get_statistics", "correlation_matrix", "group_by_aggregate", "value_counts", "detect_outliers", "profile_data", ], "data_validation": ["validate_schema", "check_data_quality", "find_anomalies"], "session_management": ["multi_session_support", "session_isolation", "auto_cleanup"], }, "supported_formats": ["csv", "tsv", "json", "excel", "parquet", "html", "markdown"], "max_file_size_mb": int(os.getenv("CSV_MAX_FILE_SIZE", "1024")), "session_timeout_minutes": int(os.getenv("CSV_SESSION_TIMEOUT", "60")), } - OperationType enum defining FILTER as 'filter' used for recording filter operations in session history.
class OperationType(str, Enum): """Types of operations that can be performed.""" LOAD = "load" FILTER = "filter" SORT = "sort" TRANSFORM = "transform" AGGREGATE = "aggregate" EXPORT = "export" ANALYZE = "analyze" UPDATE_COLUMN = "update_column" ADD_COLUMN = "add_column" REMOVE_COLUMN = "remove_column" RENAME = "rename" SELECT = "select" CHANGE_TYPE = "change_type" FILL_MISSING = "fill_missing" REMOVE_DUPLICATES = "remove_duplicates" GROUP_BY = "group_by" VALIDATE = "validate" PROFILE = "profile" QUALITY_CHECK = "quality_check" ANOMALY_DETECTION = "anomaly_detection"