split_column
Split column values by delimiter to extract specific parts or expand into multiple columns for data transformation and organization.
Instructions
Split column values by delimiter.
Returns: ColumnOperationResult with split details
Examples: # Keep first part of split split_column(ctx, "full_name", " ", part_index=0)
# Keep last part
split_column(ctx, "email", "@", part_index=1)
# Expand into multiple columns
split_column(ctx, "address", ",", expand_to_columns=True)
# Expand with custom column names
split_column(ctx, "name", " ", expand_to_columns=True,
new_columns=["first_name", "last_name"])
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| column | Yes | Column name to split values in | |
| delimiter | No | String delimiter to split on | |
| part_index | Yes | Which part to keep (0-based index, None for first part) | |
| expand_to_columns | Yes | Whether to expand splits into multiple columns | |
| new_columns | Yes | Names for new columns when expanding |
Implementation Reference
- The main handler function for the split_column tool. It splits values in the specified column by the given delimiter, optionally extracting a specific part (via part_index) or expanding the splits into new columns (via expand_to_columns and new_columns). Uses pandas str.split and handles edge cases like empty delimiters, index out of range, and custom column naming. Returns a ColumnOperationResult.async def split_column( ctx: Annotated[Context, Field(description="FastMCP context for session access")], column: Annotated[str, Field(description="Column name to split values in")], delimiter: Annotated[str, Field(description="String delimiter to split on")] = " ", *, part_index: Annotated[ int | None, Field(description="Which part to keep (0-based index, None for first part)"), ] = None, expand_to_columns: Annotated[ bool, Field(description="Whether to expand splits into multiple columns"), ] = False, new_columns: Annotated[ list[str] | None, Field(description="Names for new columns when expanding"), ] = None, ) -> ColumnOperationResult: """Split column values by delimiter. Returns: ColumnOperationResult with split details Examples: # Keep first part of split split_column(ctx, "full_name", " ", part_index=0) # Keep last part split_column(ctx, "email", "@", part_index=1) # Expand into multiple columns split_column(ctx, "address", ",", expand_to_columns=True) # Expand with custom column names split_column(ctx, "name", " ", expand_to_columns=True, new_columns=["first_name", "last_name"]) """ # Get session_id from FastMCP context session_id = ctx.session_id _session, df = get_session_data(session_id) _validate_column_exists(column, df) if not delimiter: msg = "delimiter" raise InvalidParameterError(msg, delimiter, "Delimiter cannot be empty") # Apply split operation # pandas typing limitation: str.split(expand=bool) overload not properly typed in pandas-stubs # See: https://github.com/pandas-dev/pandas-stubs/issues/43 split_data = df[column].astype(str).str.split(delimiter, expand=expand_to_columns) # type: ignore[call-overload] if expand_to_columns: # Expanding to multiple columns if isinstance(split_data, pd.DataFrame): num_parts = len(split_data.columns) columns_created = [] # Use custom column names if provided if new_columns: if len(new_columns) > num_parts: # Truncate to actual number of parts new_columns = new_columns[:num_parts] elif len(new_columns) < num_parts: # Extend with default names for i in range(len(new_columns), num_parts): new_columns.append(f"{column}_part_{i}") column_names = new_columns else: # Generate default column names column_names = [f"{column}_part_{i}" for i in range(num_parts)] # Create new columns for i, col_name in enumerate(column_names): if i < len(split_data.columns): df[col_name] = split_data.iloc[:, i] columns_created.append(col_name) affected_columns = columns_created operation_desc = f"split_expand_{len(columns_created)}_parts" rows_affected = len(df) else: # Shouldn't happen with expand=True, but handle gracefully msg = "expand_to_columns" raise InvalidParameterError( msg, str(expand_to_columns), "Split with expand=True did not produce DataFrame", ) else: # Not expanding - keep specific part or first part if part_index is None: part_index = 0 if isinstance(split_data, pd.DataFrame): # This shouldn't happen with expand=False, but handle it if part_index < len(split_data.columns): df[column] = split_data.iloc[:, part_index] else: # Index out of range - fill with NaN df[column] = pd.NA else: # Series of lists - extract specified part def get_part(split_list: Any) -> Any: if isinstance(split_list, list) and len(split_list) > part_index: return split_list[part_index] return pd.NA df[column] = split_data.apply(get_part) affected_columns = [column] operation_desc = f"split_keep_part_{part_index}" # Count successful splits (non-null results) rows_affected = int(df[column].notna().sum()) return ColumnOperationResult( operation=operation_desc, rows_affected=rows_affected, columns_affected=affected_columns, )
- src/databeak/servers/column_text_server.py:533-533 (registration)Registration of the split_column handler as an MCP tool using the FastMCP server instance's tool decorator, explicitly naming it 'split_column'.column_text_server.tool(name="split_column")(split_column)
- Pydantic schema definitions for the split_column tool parameters using Annotated and Field, which FastMCP uses to generate the tool schema including descriptions and types.ctx: Annotated[Context, Field(description="FastMCP context for session access")], column: Annotated[str, Field(description="Column name to split values in")], delimiter: Annotated[str, Field(description="String delimiter to split on")] = " ", *, part_index: Annotated[ int | None, Field(description="Which part to keep (0-based index, None for first part)"), ] = None, expand_to_columns: Annotated[ bool, Field(description="Whether to expand splits into multiple columns"), ] = False, new_columns: Annotated[ list[str] | None, Field(description="Names for new columns when expanding"), ] = None, ) -> ColumnOperationResult: