import_skills.py•11.8 kB
#!/usr/bin/env python3
"""
Import skills from GitHub repositories into the Skills Registry
"""
import os
import sys
import json
import asyncio
import tempfile
import shutil
from pathlib import Path
from typing import List, Dict, Any
import subprocess
# Add parent directory to path for imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
from database import Database
from search import SearchEngine
# Skills to import from GitHub
SKILL_SOURCES = [
# Official Anthropic Skills
{
"repo": "https://github.com/anthropics/skills.git",
"paths": [
# Document Skills
"document-skills/docx",
"document-skills/pdf",
"document-skills/pptx",
"document-skills/xlsx",
"document-skills/markdown",
# Dev & Code
"code-execution",
"code-review",
"code-explanation",
"test-generation",
# Data
"data-skills/csv-analysis",
"data-skills/json-parser",
# Writing & Research
"research-assistant",
"summarization",
# Media
"audio-transcription",
"image-captioning",
# Utility
"file-manager",
"task-runner",
"email-parser",
"skill-creator",
"template-skill",
# Design
"canvas-design",
"slack-gif-creator",
"brand-guidelines",
"theme-factory",
"artifacts-builder",
"mcp-builder",
"internal-comms",
],
"author": "anthropic",
"verified": True,
"ai_generated": False,
},
# Obra's Superpowers Collection
{
"repo": "https://github.com/obra/superpowers.git",
"paths": [
"skills/using-git-worktrees",
"skills/test-driven-development",
"skills/subagent-driven-development",
"skills/executing-plans",
"skills/finishing-a-development-branch",
"skills/root-cause-tracing",
"skills/writing-plans",
"skills/writing-skills",
"skills/brainstorming",
"skills/using-superpowers",
"skills/sharing-skills",
"skills/commands",
"skills/receiving-code-review",
"skills/requesting-code-review",
"skills/dispatching-parallel-agents",
"skills/defense-in-depth",
"skills/systematic-debugging",
"skills/testing-anti-patterns",
"skills/testing-skills-with-subagents",
"skills/verification-before-completion",
"skills/condition-based-waiting",
],
"author": "obra",
"verified": True,
"ai_generated": False,
},
# Composio Awesome Skills
{
"repo": "https://github.com/ComposioHQ/awesome-claude-skills.git",
"paths": [
"changelog-generator",
"content-research-writer",
"image-enhancer",
"video-downloader",
"invoice-organizer",
"raffle-winner-picker",
],
"author": "composio",
"verified": True,
"ai_generated": False,
},
# Michal Parkola's Tapestry Skills
{
"repo": "https://github.com/michalparkola/tapestry-skills-for-claude-code.git",
"paths": [
"ship-learn-next",
"article-extractor",
"youtube-transcript",
"tapestry",
],
"author": "michalparkola",
"verified": True,
"ai_generated": False,
},
# mhattingpete's Skills Marketplace
{
"repo": "https://github.com/mhattingpete/claude-skills-marketplace.git",
"paths": [
"git-pushing",
"review-implementing",
"test-fixing",
],
"author": "mhattingpete",
"verified": False,
"ai_generated": False,
},
# coffeefuelbump's CSV Summarizer
{
"repo": "https://github.com/coffeefuelbump/csv-data-summarizer-claude-skill.git",
"paths": ["."],
"author": "coffeefuelbump",
"verified": False,
"ai_generated": False,
},
# Community Skills (single repos)
{
"repo": "https://github.com/smerchek/claude-epub-skill.git",
"paths": ["."],
"author": "smerchek",
"verified": False,
"ai_generated": False,
},
{
"repo": "https://github.com/jthack/ffuf_claude_skill.git",
"paths": ["."],
"author": "jthack",
"verified": False,
"ai_generated": False,
},
]
def clone_repo(repo_url: str, target_dir: str) -> bool:
"""Clone a git repository"""
try:
print(f"Cloning {repo_url}...")
subprocess.run(
["git", "clone", "--depth", "1", repo_url, target_dir],
check=True,
capture_output=True
)
return True
except subprocess.CalledProcessError as e:
print(f"Failed to clone {repo_url}: {e.stderr.decode()}")
return False
def find_skill_md(skill_path: Path) -> Path:
"""Find SKILL.md file in the skill directory"""
# Try exact match first
skill_md = skill_path / "SKILL.md"
if skill_md.exists():
return skill_md
# Try case-insensitive search
for file in skill_path.glob("*.md"):
if file.name.upper() == "SKILL.MD":
return file
# Try README.md as fallback
readme = skill_path / "README.md"
if readme.exists():
return readme
return None
def extract_metadata_from_skill(skill_md_path: Path, skill_md_content: str) -> Dict[str, Any]:
"""Extract metadata from SKILL.md content"""
lines = skill_md_content.split('\n')
# Try to find title (first # heading)
name = skill_md_path.parent.name.replace('-', ' ').title()
for line in lines[:20]: # Check first 20 lines
if line.startswith('# '):
name = line[2:].strip()
break
# Try to find description (first paragraph after title)
description = ""
found_title = False
for line in lines[:50]: # Check first 50 lines
if line.startswith('# '):
found_title = True
continue
if found_title and line.strip() and not line.startswith('#'):
description = line.strip()
break
# Infer category from path
parent_name = skill_md_path.parent.name.lower()
if 'doc' in parent_name or 'pdf' in parent_name or 'pptx' in parent_name or 'xlsx' in parent_name:
category = "documents"
elif 'code' in parent_name or 'dev' in parent_name or 'git' in parent_name:
category = "development"
elif 'design' in parent_name or 'art' in parent_name or 'canvas' in parent_name:
category = "design"
elif 'data' in parent_name or 'csv' in parent_name:
category = "data"
elif 'research' in parent_name or 'article' in parent_name:
category = "research"
elif 'test' in parent_name or 'debug' in parent_name:
category = "testing"
else:
category = "general"
# Extract tags from content
tags = []
content_lower = skill_md_content.lower()
if 'python' in content_lower:
tags.append('python')
if 'javascript' in content_lower or 'react' in content_lower:
tags.append('javascript')
if 'git' in content_lower:
tags.append('git')
if 'test' in content_lower:
tags.append('testing')
return {
"name": name,
"description": description or f"Skill: {name}",
"category": category,
"tags": tags,
}
async def import_skills():
"""Import all skills from GitHub repositories"""
# Initialize database
db_url = os.getenv("DATABASE_URL", "postgresql://skills:skills_dev_password@localhost:5432/skills_registry")
db = Database(db_url)
# Initialize search engine
openai_key = os.getenv("OPENAI_API_KEY")
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
search = SearchEngine(db, openai_key, anthropic_key)
total_imported = 0
total_failed = 0
# Create temporary directory for cloning repos
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
for source in SKILL_SOURCES:
repo_url = source["repo"]
repo_name = repo_url.split('/')[-1].replace('.git', '')
clone_path = temp_path / repo_name
# Clone repository
if not clone_repo(repo_url, str(clone_path)):
print(f"Skipping {repo_url} due to clone failure")
continue
# Import each skill from this repo
for skill_path_str in source["paths"]:
skill_path = clone_path / skill_path_str
if not skill_path.exists():
print(f"⚠️ Path not found: {skill_path}")
total_failed += 1
continue
# Find SKILL.md
skill_md_path = find_skill_md(skill_path)
if not skill_md_path:
print(f"⚠️ No SKILL.md found in: {skill_path}")
total_failed += 1
continue
# Read skill content
try:
with open(skill_md_path, 'r', encoding='utf-8') as f:
skill_md_content = f.read()
except Exception as e:
print(f"⚠️ Failed to read {skill_md_path}: {e}")
total_failed += 1
continue
# Extract metadata
metadata = extract_metadata_from_skill(skill_md_path, skill_md_content)
# Create skill in database
try:
skill_id = await db.create_skill(
name=metadata["name"],
description=metadata["description"],
skill_md_content=skill_md_content,
category=metadata["category"],
tags=metadata["tags"],
author_id=source["author"],
visibility="public",
ai_generated=source["ai_generated"]
)
# Generate embedding
await search.generate_embedding(skill_id)
# Mark as verified if from trusted source
if source["verified"]:
conn = db._get_conn()
try:
with conn.cursor() as cur:
cur.execute(
"UPDATE skills SET verified = TRUE WHERE skill_id = %s",
(skill_id,)
)
conn.commit()
finally:
conn.close()
print(f"✅ Imported: {metadata['name']} ({skill_id})")
total_imported += 1
except Exception as e:
print(f"⚠️ Failed to import {metadata['name']}: {e}")
total_failed += 1
print("\n" + "="*60)
print(f"Import complete!")
print(f"✅ Successfully imported: {total_imported}")
print(f"⚠️ Failed: {total_failed}")
print("="*60)
if __name__ == "__main__":
asyncio.run(import_skills())