parse_skills_column
Extract and encode individual skills from comma-separated columns in data files for analysis and visualization.
Instructions
Parse comma-separated skills into individual skills and create one-hot encoding.
Args: file_path: Path to the data file skills_column: Column name containing comma-separated skills output_path: Optional path to save the processed data
Returns: Information about the parsed skills data
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| file_path | Yes | ||
| skills_column | Yes | ||
| output_path | No |
Implementation Reference
- src/visidata_mcp/server.py:1012-1012 (registration)The parse_skills_column tool is registered with the MCP server using the @mcp.tool() decorator
@mcp.tool() - src/visidata_mcp/server.py:1013-1096 (handler)The parse_skills_column function implements the logic to parse comma-separated skills from a column and create one-hot encoding columns for each unique skill
def parse_skills_column(file_path: str, skills_column: str, output_path: Optional[str] = None) -> str: """ Parse comma-separated skills into individual skills and create one-hot encoding. Args: file_path: Path to the data file skills_column: Column name containing comma-separated skills output_path: Optional path to save the processed data Returns: Information about the parsed skills data """ try: import pandas as pd from pathlib import Path # Load the data file_extension = Path(file_path).suffix.lower() if file_extension == '.csv': df = pd.read_csv(file_path) elif file_extension == '.json': df = pd.read_json(file_path) elif file_extension in ['.xlsx', '.xls']: df = pd.read_excel(file_path) elif file_extension == '.tsv': df = pd.read_csv(file_path, sep='\t') else: df = pd.read_csv(file_path) if skills_column not in df.columns: return f"Error: Column '{skills_column}' not found in data" # Parse skills and create one-hot encoding all_skills = set() # Extract all unique skills for skills_str in df[skills_column].dropna(): if pd.isna(skills_str): continue skills = [skill.strip() for skill in str(skills_str).split(',') if skill.strip()] all_skills.update(skills) all_skills = sorted(list(all_skills)) # Create one-hot encoding for each skill skills_df = df.copy() for skill in all_skills: skills_df[f"skill_{skill.replace(' ', '_').replace('-', '_').lower()}"] = 0 # Fill in the one-hot encoding for idx, skills_str in enumerate(df[skills_column]): if pd.isna(skills_str): continue skills = [skill.strip() for skill in str(skills_str).split(',') if skill.strip()] for skill in skills: col_name = f"skill_{skill.replace(' ', '_').replace('-', '_').lower()}" if col_name in skills_df.columns: skills_df.loc[idx, col_name] = 1 # Save processed data if output path provided if output_path: if output_path.endswith('.csv'): skills_df.to_csv(output_path, index=False) elif output_path.endswith('.json'): skills_df.to_json(output_path, orient='records', indent=2) elif output_path.endswith(('.xlsx', '.xls')): skills_df.to_excel(output_path, index=False) else: skills_df.to_csv(output_path, index=False) result = { "skills_parsed": True, "original_column": skills_column, "unique_skills_count": len(all_skills), "unique_skills": all_skills[:20], # First 20 skills for preview "rows_processed": len(df), "new_columns_added": len(all_skills), "output_file": output_path if output_path else None } return json.dumps(result, indent=2) except Exception as e: return f"Error parsing skills: {str(e)}\n{traceback.format_exc()}"