create_skills_location_heatmap
Visualize skills distribution across geographic locations to identify regional talent concentrations and skill gaps using heatmap analysis.
Instructions
Create a heatmap showing skills distribution across locations.
Args: file_path: Path to the data file skills_column: Column name containing comma-separated skills location_column: Column name containing location information output_path: Path where to save the heatmap image top_skills: Number of top skills to include (default: 15) top_locations: Number of top locations to include (default: 10)
Returns: Information about the created skills-location heatmap
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| file_path | Yes | ||
| skills_column | Yes | ||
| location_column | Yes | ||
| output_path | Yes | ||
| top_skills | No | ||
| top_locations | No |
Implementation Reference
- src/visidata_mcp/server.py:1200-1314 (handler)The create_skills_location_heatmap tool handler creates a heatmap showing skills distribution across locations. It parses comma-separated skills from a specified column, aggregates them by location, calculates percentages, and generates a visualization using seaborn's heatmap function.
@mcp.tool() def create_skills_location_heatmap(file_path: str, skills_column: str, location_column: str, output_path: str, top_skills: int = 15, top_locations: int = 10) -> str: """ Create a heatmap showing skills distribution across locations. Args: file_path: Path to the data file skills_column: Column name containing comma-separated skills location_column: Column name containing location information output_path: Path where to save the heatmap image top_skills: Number of top skills to include (default: 15) top_locations: Number of top locations to include (default: 10) Returns: Information about the created skills-location heatmap """ try: if not VISUALIZATION_AVAILABLE: return f"Error: {VISUALIZATION_ERROR}" import pandas as pd from pathlib import Path from collections import defaultdict, Counter # Load the data file_extension = Path(file_path).suffix.lower() if file_extension == '.csv': df = pd.read_csv(file_path) elif file_extension == '.json': df = pd.read_json(file_path) elif file_extension in ['.xlsx', '.xls']: df = pd.read_excel(file_path) elif file_extension == '.tsv': df = pd.read_csv(file_path, sep='\t') else: df = pd.read_csv(file_path) if skills_column not in df.columns: return f"Error: Column '{skills_column}' not found in data" if location_column not in df.columns: return f"Error: Column '{location_column}' not found in data" # Parse skills and create location-skill matrix location_skills = defaultdict(list) all_skills = Counter() for _, row in df.iterrows(): location = row[location_column] skills_str = row[skills_column] if pd.isna(location) or pd.isna(skills_str): continue skills = [skill.strip() for skill in str(skills_str).split(',') if skill.strip()] location_skills[location].extend(skills) all_skills.update(skills) # Get top skills and locations top_skills_list = [skill for skill, _ in all_skills.most_common(top_skills)] # Calculate location totals and get top locations location_totals = {loc: len(skills) for loc, skills in location_skills.items()} top_locations_list = sorted(location_totals.keys(), key=lambda x: location_totals[x], reverse=True)[:top_locations] # Create matrix matrix_data = [] for location in top_locations_list: location_skill_counts = Counter(location_skills[location]) total_skills_in_location = sum(location_skill_counts.values()) row = [] for skill in top_skills_list: # Calculate percentage of this skill in this location percentage = (location_skill_counts[skill] / total_skills_in_location * 100) if total_skills_in_location > 0 else 0 row.append(percentage) matrix_data.append(row) # Create DataFrame for heatmap heatmap_df = pd.DataFrame(matrix_data, index=top_locations_list, columns=top_skills_list) # Create the heatmap plt.figure(figsize=(max(12, len(top_skills_list) * 0.8), max(8, len(top_locations_list) * 0.6))) sns.heatmap(heatmap_df, annot=True, fmt='.1f', cmap='YlOrRd', cbar_kws={'label': 'Skill Percentage (%)'}, linewidths=0.5) plt.title(f'Skills Distribution Across Top {top_locations} Locations\n(Top {top_skills} Skills)') plt.xlabel('Skills') plt.ylabel('Locations') plt.xticks(rotation=45, ha='right') plt.yticks(rotation=0) plt.tight_layout() # Save the plot plt.savefig(output_path, dpi=300, bbox_inches='tight') plt.close() result = { "skills_location_heatmap_created": True, "top_skills_analyzed": len(top_skills_list), "top_locations_analyzed": len(top_locations_list), "skills_included": top_skills_list, "locations_included": top_locations_list, "output_file": output_path, "file_size": Path(output_path).stat().st_size if Path(output_path).exists() else 0 } return json.dumps(result, indent=2) except Exception as e: return f"Error creating skills-location heatmap: {str(e)}\n{traceback.format_exc()}"