Skip to main content
Glama
indexing_engine.py•6.23 kB
"""Hybrid RAG indexing engine combining vector search and knowledge graph.""" from dataclasses import dataclass from typing import Optional from mcp_skills.services.skill_manager import Skill @dataclass class ScoredSkill: """Skill with relevance score. Attributes: skill: The Skill object score: Relevance score (0.0-1.0) match_type: Type of match (vector, graph, hybrid) """ skill: Skill score: float match_type: str @dataclass class IndexStats: """Index statistics. Attributes: total_skills: Total number of indexed skills vector_store_size: Size of vector store in bytes graph_nodes: Number of nodes in knowledge graph graph_edges: Number of edges in knowledge graph last_indexed: Timestamp of last indexing operation """ total_skills: int vector_store_size: int graph_nodes: int graph_edges: int last_indexed: str class IndexingEngine: """Build and maintain vector + KG indices for skill discovery. Combines vector embeddings for semantic search with knowledge graph for relationship-based discovery. Architecture: - Vector Store: ChromaDB or Qdrant for semantic similarity - Knowledge Graph: NetworkX for skill relationships - Embeddings: sentence-transformers/all-MiniLM-L6-v2 """ def __init__( self, vector_backend: str = "chromadb", graph_backend: str = "networkx" ) -> None: """Initialize indexing engine. Args: vector_backend: Vector store backend (chromadb, qdrant, faiss) graph_backend: Knowledge graph backend (networkx, neo4j) """ self.vector_backend = vector_backend self.graph_backend = graph_backend # TODO: Initialize vector store and graph connections def index_skill(self, skill: Skill) -> None: """Add skill to vector + KG stores. Args: skill: Skill object to index """ # TODO: Implement skill indexing # 1. Generate embeddings for skill content # 2. Add to vector store with metadata # 3. Create/update graph nodes and edges # 4. Store in metadata database pass def build_embeddings(self, skill: Skill) -> list[float]: """Generate embeddings from skill content. Combines name, description, instructions, and examples into embeddings using sentence-transformers. Args: skill: Skill to generate embeddings for Returns: Embedding vector as list of floats """ # TODO: Implement embedding generation # 1. Concatenate skill text fields # 2. Use sentence-transformers to generate embeddings # 3. Return embedding vector return [] def extract_relationships(self, skill: Skill) -> list[tuple[str, str, str]]: """Identify skill dependencies and relationships. Args: skill: Skill to extract relationships from Returns: List of (source_id, relation_type, target_id) tuples """ # TODO: Implement relationship extraction # 1. Parse dependencies field # 2. Identify category relationships # 3. Detect tag-based relationships # 4. Find toolchain associations # 5. Return list of relationships return [] def reindex_all(self, force: bool = False) -> IndexStats: """Rebuild indices from scratch. Args: force: Force rebuild even if indices exist Returns: Index statistics after rebuild """ # TODO: Implement full reindexing # 1. Clear existing indices (if force=True) # 2. Discover all skills # 3. Generate embeddings for all skills # 4. Build knowledge graph # 5. Return statistics return IndexStats( total_skills=0, vector_store_size=0, graph_nodes=0, graph_edges=0, last_indexed="never", ) def search( self, query: str, toolchain: Optional[str] = None, category: Optional[str] = None, top_k: int = 10, ) -> list[ScoredSkill]: """Search skills using vector similarity + KG. Hybrid search combines: 1. Vector similarity for semantic matching 2. Knowledge graph for relationship-based discovery 3. Reranking based on toolchain and category filters Args: query: Search query (natural language) toolchain: Optional toolchain filter (Python, TypeScript, etc.) category: Optional category filter (testing, debugging, etc.) top_k: Maximum number of results Returns: List of ScoredSkill objects sorted by relevance """ # TODO: Implement hybrid search # 1. Generate query embedding # 2. Vector search in ChromaDB/Qdrant # 3. Graph-based related skills # 4. Combine and rerank results # 5. Apply filters (toolchain, category) # 6. Return top_k results return [] def get_related_skills(self, skill_id: str, max_depth: int = 2) -> list[Skill]: """Find related skills via knowledge graph. Traverses graph to find skills connected via dependencies, categories, or tags. Args: skill_id: Starting skill ID max_depth: Maximum traversal depth Returns: List of related Skill objects """ # TODO: Implement graph traversal # 1. Find skill node in graph # 2. BFS/DFS to specified depth # 3. Collect connected skill IDs # 4. Load Skill objects # 5. Return list return [] def get_stats(self) -> IndexStats: """Get current index statistics. Returns: IndexStats object with current metrics """ # TODO: Implement statistics gathering return IndexStats( total_skills=0, vector_store_size=0, graph_nodes=0, graph_edges=0, last_indexed="never", )

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/bobmatnyc/mcp-skills'

If you have feedback or need assistance with the MCP directory API, please join our Discord server