Skip to main content
Glama
base-vector-service.ts8.57 kB
/** * Base Vector Service * * Generic vector operations that can be extended for different data types * (patterns, capabilities, dependencies, etc.) */ import { VectorDBService, VectorDocument } from './vector-db-service'; import { EmbeddingService } from './embedding-service'; export interface BaseSearchOptions { limit?: number; scoreThreshold?: number; keywordWeight?: number; // Weight for keyword vs semantic search } export interface BaseSearchResult<T> { data: T; score: number; matchType: 'keyword' | 'semantic' | 'hybrid'; } export interface SearchMode { semantic: boolean; provider?: string; reason?: string; } /** * Abstract base class for vector-based data services */ export abstract class BaseVectorService<T> { protected vectorDB: VectorDBService; protected embeddingService: EmbeddingService; protected collectionName: string; constructor(collectionName: string, vectorDB?: VectorDBService, embeddingService?: EmbeddingService) { this.collectionName = collectionName; this.vectorDB = vectorDB || new VectorDBService({ collectionName }); this.embeddingService = embeddingService || new EmbeddingService(); } /** * Initialize the collection */ async initialize(): Promise<void> { // Use embedding dimensions if available, otherwise default to 1536 (OpenAI default) const dimensions = this.embeddingService.isAvailable() ? this.embeddingService.getDimensions() : 1536; await this.vectorDB.initializeCollection(dimensions); } /** * Health check for Vector DB connection */ async healthCheck(): Promise<boolean> { return await this.vectorDB.healthCheck(); } /** * Store data in Vector DB with optional semantic embedding */ async storeData(data: T): Promise<void> { const searchText = this.createSearchText(data); const id = this.extractId(data); // Generate embedding - required for vector storage let embedding: number[]; if (this.embeddingService.isAvailable()) { try { embedding = await this.embeddingService.generateEmbedding(searchText); } catch (error) { // Fail immediately with clear error about embedding generation throw new Error(`Embedding generation failed: ${error instanceof Error ? error.message : String(error)}`); } } else { // Embedding service not available - fail with clear error throw new Error('Embedding service not available - cannot store data in vector collection'); } const document: VectorDocument = { id, payload: { ...this.createPayload(data), searchText: searchText, hasEmbedding: true }, vector: embedding }; await this.vectorDB.upsertDocument(document); } /** * Search for data using hybrid semantic + keyword matching */ async searchData( query: string, options: BaseSearchOptions = {} ): Promise<BaseSearchResult<T>[]> { // Fail immediately if embedding service not available - no graceful fallback if (!this.embeddingService.isAvailable()) { throw new Error('Embedding service not available - cannot perform semantic search'); } // Extract keywords for keyword search const queryKeywords = this.extractKeywords(query); if (queryKeywords.length === 0) { return []; } const limit = options.limit || 10; const scoreThreshold = options.scoreThreshold || 0.01; // Perform hybrid search (semantic + keyword) try { return await this.hybridSearch(query, queryKeywords, { limit, scoreThreshold }); } catch (error) { // Fail immediately - no fallback to keyword-only search throw new Error(`Semantic search failed: ${error instanceof Error ? error.message : String(error)}`); } } /** * Get data by ID */ async getData(id: string): Promise<T | null> { const document = await this.vectorDB.getDocument(id); if (!document) { return null; } const data = this.payloadToData(document.payload); // Set the ID from the document (data as any).id = document.id; return data; } /** * Delete data by ID */ async deleteData(id: string): Promise<void> { await this.vectorDB.deleteDocument(id); } /** * Delete all data (recreate collection) */ async deleteAllData(): Promise<void> { await this.vectorDB.deleteAllDocuments(); } /** * Get all data (limited) */ async getAllData(limit?: number): Promise<T[]> { const documents = await this.vectorDB.getAllDocuments(limit); return documents.map(doc => { const data = this.payloadToData(doc.payload); (data as any).id = doc.id; return data; }); } /** * Get total count of data items */ async getDataCount(): Promise<number> { try { const info = await this.vectorDB.getCollectionInfo(); return info.points_count || 0; } catch (error) { // Fallback: get all and count const data = await this.getAllData(); return data.length; } } /** * Get current search mode (semantic vs keyword-only) */ getSearchMode(): SearchMode { const status = this.embeddingService.getStatus(); return { semantic: status.available, provider: status.provider || undefined, reason: status.reason || (status.available ? 'Embedding service available' : undefined) }; } // Abstract methods that must be implemented by subclasses protected abstract createSearchText(data: T): string; protected abstract extractId(data: T): string; protected abstract createPayload(data: T): Record<string, any>; protected abstract payloadToData(payload: Record<string, any>): T; // Virtual methods that can be overridden by subclasses protected extractKeywords(query: string): string[] { return query.toLowerCase().split(/\s+/).filter(word => word.length > 2); } /** * Hybrid search combining semantic and keyword matching */ private async hybridSearch( query: string, queryKeywords: string[], options: { limit: number; scoreThreshold: number } ): Promise<BaseSearchResult<T>[]> { // Generate query embedding - required for semantic search const queryEmbedding = await this.embeddingService.generateEmbedding(query); if (!queryEmbedding) { throw new Error('Failed to generate query embedding for semantic search'); } // Semantic search using vector similarity const semanticResults = await this.vectorDB.searchSimilar( queryEmbedding, { limit: options.limit * 2, // Get more candidates for hybrid ranking scoreThreshold: 0.1 // Very permissive threshold for single-word queries } ); // Keyword search const keywordResults = await this.vectorDB.searchByKeywords( queryKeywords, { limit: options.limit * 2, scoreThreshold: 0.1 } ); // Combine and rank results return this.combineHybridResults(semanticResults, keywordResults, queryKeywords, options); } /** * Combine semantic and keyword results with hybrid ranking */ private combineHybridResults( semanticResults: any[], keywordResults: any[], queryKeywords: string[], options: { limit: number; scoreThreshold: number } ): BaseSearchResult<T>[] { const combinedResults = new Map<string, BaseSearchResult<T>>(); // Add semantic results for (const result of semanticResults) { const data = this.payloadToData(result.payload); (data as any).id = result.id; combinedResults.set(result.id, { data, score: result.score * 0.7, // Weight semantic similarity matchType: 'semantic' }); } // Add or boost keyword results for (const result of keywordResults) { const existing = combinedResults.get(result.id); if (existing) { // Boost score for hybrid match existing.score = Math.max(existing.score, result.score * 0.8); existing.matchType = 'hybrid'; } else { const data = this.payloadToData(result.payload); (data as any).id = result.id; combinedResults.set(result.id, { data, score: result.score * 0.6, // Weight keyword matching matchType: 'keyword' }); } } // Sort by score and apply limits return Array.from(combinedResults.values()) .filter(result => result.score >= options.scoreThreshold) .sort((a, b) => b.score - a.score) .slice(0, options.limit); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/vfarcic/dot-ai'

If you have feedback or need assistance with the MCP directory API, please join our Discord server