analyze_skills_by_location
Analyze skill frequency and distribution across geographic locations to identify regional talent patterns and workforce capabilities.
Instructions
Analyze skills frequency and distribution by location.
Args: file_path: Path to the data file skills_column: Column name containing comma-separated skills location_column: Column name containing location information output_path: Optional path to save the analysis results
Returns: Skills analysis by location
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| file_path | Yes | ||
| skills_column | Yes | ||
| location_column | Yes | ||
| output_path | No |
Implementation Reference
- src/visidata_mcp/server.py:1099-1197 (handler)Complete implementation of analyze_skills_by_location tool - analyzes skills frequency and distribution across locations from tabular data, parsing comma-separated skills and generating statistics including top skills per location, job counts, and percentages.
@mcp.tool() def analyze_skills_by_location(file_path: str, skills_column: str, location_column: str, output_path: Optional[str] = None) -> str: """ Analyze skills frequency and distribution by location. Args: file_path: Path to the data file skills_column: Column name containing comma-separated skills location_column: Column name containing location information output_path: Optional path to save the analysis results Returns: Skills analysis by location """ try: import pandas as pd from pathlib import Path from collections import defaultdict, Counter # Load the data file_extension = Path(file_path).suffix.lower() if file_extension == '.csv': df = pd.read_csv(file_path) elif file_extension == '.json': df = pd.read_json(file_path) elif file_extension in ['.xlsx', '.xls']: df = pd.read_excel(file_path) elif file_extension == '.tsv': df = pd.read_csv(file_path, sep='\t') else: df = pd.read_csv(file_path) if skills_column not in df.columns: return f"Error: Column '{skills_column}' not found in data" if location_column not in df.columns: return f"Error: Column '{location_column}' not found in data" # Analyze skills by location location_skills = defaultdict(list) for _, row in df.iterrows(): location = row[location_column] skills_str = row[skills_column] if pd.isna(location) or pd.isna(skills_str): continue skills = [skill.strip() for skill in str(skills_str).split(',') if skill.strip()] location_skills[location].extend(skills) # Calculate statistics for each location analysis_results = [] for location, skills_list in location_skills.items(): skill_counts = Counter(skills_list) total_skills = len(skills_list) unique_skills = len(skill_counts) # Top 10 most common skills for this location top_skills = skill_counts.most_common(10) analysis_results.append({ "location": location, "total_skill_mentions": total_skills, "unique_skills": unique_skills, "job_postings": sum(1 for _, row in df.iterrows() if row[location_column] == location and not pd.isna(row[skills_column])), "top_skills": [{"skill": skill, "count": count, "percentage": round(count/total_skills*100, 2)} for skill, count in top_skills] }) # Sort by total skill mentions analysis_results.sort(key=lambda x: x["total_skill_mentions"], reverse=True) # Save analysis if output path provided if output_path: analysis_df = pd.DataFrame(analysis_results) if output_path.endswith('.csv'): analysis_df.to_csv(output_path, index=False) elif output_path.endswith('.json'): with open(output_path, 'w') as f: json.dump(analysis_results, f, indent=2) elif output_path.endswith(('.xlsx', '.xls')): analysis_df.to_excel(output_path, index=False) else: analysis_df.to_csv(output_path, index=False) result = { "analysis_completed": True, "locations_analyzed": len(analysis_results), "total_locations": len(location_skills), "analysis_data": analysis_results[:10], # First 10 locations for preview "output_file": output_path if output_path else None } return json.dumps(result, indent=2) except Exception as e: return f"Error analyzing skills by location: {str(e)}\n{traceback.format_exc()}"