Skill Retriever

index_local_skill.py•4.96 KiB

""" Index a local skill into the skill-retriever data store. Adds to metadata.json, FAISS vectors, and id_mapping. Usage: uv run python scripts/index_local_skill.py """ import json import sys from pathlib import Path from datetime import datetime, timezone import numpy as np import faiss from fastembed import TextEmbedding # --- Config --- SKILL_DIR = Path.home() / "repos" / "skill-retriever" / "skills" / "science-plots" DATA_DIR = Path.home() / ".skill-retriever" / "data" COMPONENT_ID = "AnthonyAlcaraz/claude-skills/skill/science-plots" def parse_frontmatter(text): """Extract YAML frontmatter from markdown.""" lines = text.strip().split("\n") if lines[0].strip() != "---": return {}, text end = -1 for i in range(1, len(lines)): if lines[i].strip() == "---": end = i break if end == -1: return {}, text fm = {} for line in lines[1:end]: if ":" in line: key, val = line.split(":", 1) fm[key.strip()] = val.strip().strip('"').strip("'") body = "\n".join(lines[end + 1:]).strip() return fm, body def main(): # 1. Read skill content skill_path = SKILL_DIR / "SKILL.md" if not skill_path.exists(): print(f"ERROR: {skill_path} not found") sys.exit(1) raw = skill_path.read_text(encoding="utf-8") fm, body = parse_frontmatter(raw) # 2. Build metadata entry entry = { "id": COMPONENT_ID, "name": "science-plots", "component_type": "skill", "description": fm.get("description", "Create publication-quality scientific figures using SciencePlots + matplotlib"), "tags": ["matplotlib", "scientific-plotting", "ieee", "nature", "latex", "data-visualization"], "author": fm.get("author", "AnthonyAlcaraz"), "version": fm.get("version", "1.0"), "last_updated": datetime.now(timezone.utc).isoformat(), "commit_count": 1, "commit_frequency_30d": 0.0, "raw_content": raw, "parameters": {}, "dependencies": [], "tools": [], "source_repo": "AnthonyAlcaraz/claude-skills", "source_path": "skills/science-plots/SKILL.md", "category": "data-visualization", "install_url": None, "security_risk_level": "safe", "security_risk_score": 0.0, "security_findings_count": 0, "has_scripts": True } # 3. Add to metadata.json meta_path = DATA_DIR / "metadata.json" metadata = json.loads(meta_path.read_text(encoding="utf-8")) # Remove existing entry if present (idempotent) metadata = [m for m in metadata if m["id"] != COMPONENT_ID] metadata.append(entry) meta_path.write_text(json.dumps(metadata, indent=2, default=str), encoding="utf-8") print(f"Metadata: added ({len(metadata)} total components)") # 4. Generate embedding print("Generating embedding with BAAI/bge-small-en-v1.5...") model = TextEmbedding(model_name="BAAI/bge-small-en-v1.5") embeddings = list(model.embed([raw])) vector = np.array(embeddings[0], dtype=np.float32).reshape(1, -1) print(f"Embedding shape: {vector.shape}") # 5. Add to FAISS index index_path = DATA_DIR / "vectors" / "faiss_index.bin" mapping_path = DATA_DIR / "vectors" / "id_mapping.json" index = faiss.read_index(str(index_path)) mapping = json.loads(mapping_path.read_text(encoding="utf-8")) id_to_int = mapping["id_to_int"] int_to_id = mapping["int_to_id"] # Determine next integer ID if id_to_int: next_int = max(id_to_int.values()) + 1 else: next_int = 0 # Remove existing if present (rebuild needed for true removal, but we can just skip re-adding) if COMPONENT_ID in id_to_int: print(f"Already in FAISS at index {id_to_int[COMPONENT_ID]}, updating mapping only") else: # IndexIDMap requires add_with_ids ids = np.array([next_int], dtype=np.int64) index.add_with_ids(vector, ids) id_to_int[COMPONENT_ID] = next_int int_to_id[str(next_int)] = COMPONENT_ID print(f"FAISS: added at index {next_int} (total vectors: {index.ntotal})") faiss.write_index(index, str(index_path)) mapping_path.write_text(json.dumps(mapping, indent=2), encoding="utf-8") print("Vectors saved") # 6. Add to graph.json graph_path = DATA_DIR / "graph.json" graph = json.loads(graph_path.read_text(encoding="utf-8")) # Add node if not present existing_ids = {n["id"] for n in graph.get("nodes", [])} if COMPONENT_ID not in existing_ids: graph["nodes"].append({ "id": COMPONENT_ID, "label": "science-plots", "component_type": "skill" }) print(f"Graph: added node ({len(graph['nodes'])} total)") else: print("Graph: node already exists") graph_path.write_text(json.dumps(graph, indent=2, default=str), encoding="utf-8") print(f"\nDone! '{COMPONENT_ID}' indexed into skill-retriever.") if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/AnthonyAlcaraz/skill-retriever'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

index_local_skill.py•4.96 KiB