Skip to main content
Glama

parse_skills_column

Extract and encode individual skills from comma-separated columns in data files for analysis and visualization.

Instructions

Parse comma-separated skills into individual skills and create one-hot encoding.

Args: file_path: Path to the data file skills_column: Column name containing comma-separated skills output_path: Optional path to save the processed data

Returns: Information about the parsed skills data

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
file_pathYes
skills_columnYes
output_pathNo

Implementation Reference

  • The parse_skills_column tool is registered with the MCP server using the @mcp.tool() decorator
    @mcp.tool()
  • The parse_skills_column function implements the logic to parse comma-separated skills from a column and create one-hot encoding columns for each unique skill
    def parse_skills_column(file_path: str, skills_column: str, output_path: Optional[str] = None) -> str:
        """
        Parse comma-separated skills into individual skills and create one-hot encoding.
        
        Args:
            file_path: Path to the data file
            skills_column: Column name containing comma-separated skills
            output_path: Optional path to save the processed data
        
        Returns:
            Information about the parsed skills data
        """
        try:
            import pandas as pd
            from pathlib import Path
            
            # Load the data
            file_extension = Path(file_path).suffix.lower()
            if file_extension == '.csv':
                df = pd.read_csv(file_path)
            elif file_extension == '.json':
                df = pd.read_json(file_path)
            elif file_extension in ['.xlsx', '.xls']:
                df = pd.read_excel(file_path)
            elif file_extension == '.tsv':
                df = pd.read_csv(file_path, sep='\t')
            else:
                df = pd.read_csv(file_path)
            
            if skills_column not in df.columns:
                return f"Error: Column '{skills_column}' not found in data"
            
            # Parse skills and create one-hot encoding
            all_skills = set()
            
            # Extract all unique skills
            for skills_str in df[skills_column].dropna():
                if pd.isna(skills_str):
                    continue
                skills = [skill.strip() for skill in str(skills_str).split(',') if skill.strip()]
                all_skills.update(skills)
            
            all_skills = sorted(list(all_skills))
            
            # Create one-hot encoding for each skill
            skills_df = df.copy()
            for skill in all_skills:
                skills_df[f"skill_{skill.replace(' ', '_').replace('-', '_').lower()}"] = 0
            
            # Fill in the one-hot encoding
            for idx, skills_str in enumerate(df[skills_column]):
                if pd.isna(skills_str):
                    continue
                skills = [skill.strip() for skill in str(skills_str).split(',') if skill.strip()]
                for skill in skills:
                    col_name = f"skill_{skill.replace(' ', '_').replace('-', '_').lower()}"
                    if col_name in skills_df.columns:
                        skills_df.loc[idx, col_name] = 1
            
            # Save processed data if output path provided
            if output_path:
                if output_path.endswith('.csv'):
                    skills_df.to_csv(output_path, index=False)
                elif output_path.endswith('.json'):
                    skills_df.to_json(output_path, orient='records', indent=2)
                elif output_path.endswith(('.xlsx', '.xls')):
                    skills_df.to_excel(output_path, index=False)
                else:
                    skills_df.to_csv(output_path, index=False)
            
            result = {
                "skills_parsed": True,
                "original_column": skills_column,
                "unique_skills_count": len(all_skills),
                "unique_skills": all_skills[:20],  # First 20 skills for preview
                "rows_processed": len(df),
                "new_columns_added": len(all_skills),
                "output_file": output_path if output_path else None
            }
            
            return json.dumps(result, indent=2)
            
        except Exception as e:
            return f"Error parsing skills: {str(e)}\n{traceback.format_exc()}"

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/moeloubani/visidata-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server