Skip to main content
Glama
FosterG4

Code Reference Optimizer MCP Server

by FosterG4
EmbeddingGenerator.ts36.1 kB
import * as fs from 'fs/promises'; import * as path from 'path'; import type { CombinedFeatures } from './FeatureExtractor.js'; import type { EmbeddingVector, SymbolEmbedding } from './RelevanceScorer.js'; export interface Word2VecModel { vocabulary: Map<string, number>; embeddings: Float32Array[]; dimensions: number; windowSize: number; minCount: number; iterations: number; } export interface Doc2VecModel { wordModel: Word2VecModel; documentEmbeddings: Map<string, Float32Array>; alpha: number; minAlpha: number; } export interface CodeEmbeddingModel { tokenEmbeddings: Map<string, Float32Array>; structuralEmbeddings: Map<string, Float32Array>; semanticEmbeddings: Map<string, Float32Array>; dimensions: number; combinationWeights: { token: number; structural: number; semantic: number; }; } export interface SimilarityMetrics { cosine: number; euclidean: number; manhattan: number; jaccard: number; pearson: number; } export interface ClusterInfo { id: string; centroid: Float32Array; members: string[]; cohesion: number; separation: number; silhouetteScore: number; } export interface DimensionalityReduction { method: 'pca' | 'tsne' | 'umap'; originalDimensions: number; reducedDimensions: number; explainedVariance?: number; components?: Float32Array[]; } export class EmbeddingGenerator { private word2vecModel: Word2VecModel | null = null; private doc2vecModel: Doc2VecModel | null = null; private codeModel: CodeEmbeddingModel | null = null; private embeddingCache: Map<string, Float32Array> = new Map(); private similarityCache: Map<string, SimilarityMetrics> = new Map(); constructor( private dimensions: number = 256, private windowSize: number = 5, private minCount: number = 2 ) {} /** * Train Word2Vec model on code tokens */ async trainWord2Vec(corpus: string[][]): Promise<Word2VecModel> { console.log('Training Word2Vec model...'); // Build vocabulary const vocabulary = this.buildVocabulary(corpus); console.log(`Vocabulary size: ${vocabulary.size}`); // Initialize embeddings randomly const embeddings = this.initializeEmbeddings(vocabulary.size); // Train using Skip-gram with negative sampling await this.trainSkipGram(corpus, vocabulary, embeddings); this.word2vecModel = { vocabulary, embeddings, dimensions: this.dimensions, windowSize: this.windowSize, minCount: this.minCount, iterations: 10 }; console.log('Word2Vec training completed'); return this.word2vecModel; } /** * Train Doc2Vec model for document-level embeddings */ async trainDoc2Vec( documents: Array<{ id: string; tokens: string[] }> ): Promise<Doc2VecModel> { console.log('Training Doc2Vec model...'); // First train Word2Vec on all tokens const allTokens = documents.map(doc => doc.tokens); const wordModel = await this.trainWord2Vec(allTokens); // Train document embeddings const documentEmbeddings = new Map<string, Float32Array>(); for (const doc of documents) { const embedding = await this.trainDocumentEmbedding(doc, wordModel); documentEmbeddings.set(doc.id, embedding); } this.doc2vecModel = { wordModel, documentEmbeddings, alpha: 0.025, minAlpha: 0.0001 }; console.log('Doc2Vec training completed'); return this.doc2vecModel; } /** * Train specialized code embedding model */ async trainCodeEmbeddings( codeData: Array<{ symbol: string; tokens: string[]; structure: string[]; semantics: string[]; features: CombinedFeatures; }> ): Promise<CodeEmbeddingModel> { console.log('Training code embedding model...'); // Train token embeddings const tokenCorpus = codeData.map(item => item.tokens); const tokenModel = await this.trainWord2Vec(tokenCorpus); const tokenEmbeddings = new Map<string, Float32Array>(); for (const [token, index] of tokenModel.vocabulary) { tokenEmbeddings.set(token, tokenModel.embeddings[index]); } // Train structural embeddings const structuralCorpus = codeData.map(item => item.structure); const structuralModel = await this.trainWord2Vec(structuralCorpus); const structuralEmbeddings = new Map<string, Float32Array>(); for (const [token, index] of structuralModel.vocabulary) { structuralEmbeddings.set(token, structuralModel.embeddings[index]); } // Train semantic embeddings const semanticCorpus = codeData.map(item => item.semantics); const semanticModel = await this.trainWord2Vec(semanticCorpus); const semanticEmbeddings = new Map<string, Float32Array>(); for (const [token, index] of semanticModel.vocabulary) { semanticEmbeddings.set(token, semanticModel.embeddings[index]); } this.codeModel = { tokenEmbeddings, structuralEmbeddings, semanticEmbeddings, dimensions: this.dimensions, combinationWeights: { token: 0.5, structural: 0.3, semantic: 0.2 } }; console.log('Code embedding training completed'); return this.codeModel; } /** * Generate embedding for a symbol using trained models */ async generateSymbolEmbedding( symbol: string, tokens: string[], structure: string[], semantics: string[], features: CombinedFeatures ): Promise<EmbeddingVector> { const cacheKey = `${symbol}:${tokens.join(',')}`; const cached = this.embeddingCache.get(cacheKey); if (cached) { return { values: Array.from(cached), dimensions: cached.length, magnitude: this.calculateMagnitude(cached) }; } let embedding: Float32Array; if (this.codeModel) { embedding = this.generateCodeEmbedding(tokens, structure, semantics, features); } else if (this.doc2vecModel) { embedding = await this.generateDocEmbedding(tokens); } else if (this.word2vecModel) { embedding = this.generateWordEmbedding(tokens); } else { embedding = this.generateTfIdfEmbedding(tokens); } // Normalize the embedding this.normalizeEmbedding(embedding); // Cache the result this.embeddingCache.set(cacheKey, embedding); return { values: Array.from(embedding), dimensions: embedding.length, magnitude: this.calculateMagnitude(embedding) }; } /** * Generate query embedding for search */ async generateQueryEmbedding(query: string): Promise<EmbeddingVector> { const tokens = this.tokenizeQuery(query); const embedding = await this.generateSymbolEmbedding( 'query', tokens, [], [], {} as CombinedFeatures ); return embedding; } /** * Calculate similarity between two embeddings */ calculateSimilarity( embedding1: EmbeddingVector, embedding2: EmbeddingVector ): SimilarityMetrics { const key = `${embedding1.values.join(',')}:${embedding2.values.join(',')}`; const cached = this.similarityCache.get(key); if (cached) return cached; const vec1 = new Float32Array(embedding1.values); const vec2 = new Float32Array(embedding2.values); const metrics: SimilarityMetrics = { cosine: this.cosineSimilarity(vec1, vec2), euclidean: this.euclideanDistance(vec1, vec2), manhattan: this.manhattanDistance(vec1, vec2), jaccard: this.jaccardSimilarity(embedding1.values, embedding2.values), pearson: this.pearsonCorrelation(vec1, vec2) }; this.similarityCache.set(key, metrics); return metrics; } /** * Find most similar embeddings using approximate nearest neighbor search */ async findSimilarEmbeddings( queryEmbedding: EmbeddingVector, candidateEmbeddings: Map<string, EmbeddingVector>, topK: number = 10, threshold: number = 0.1 ): Promise<Array<{ id: string; similarity: number; metrics: SimilarityMetrics }>> { const results: Array<{ id: string; similarity: number; metrics: SimilarityMetrics }> = []; for (const [id, embedding] of candidateEmbeddings) { const metrics = this.calculateSimilarity(queryEmbedding, embedding); if (metrics.cosine >= threshold) { results.push({ id, similarity: metrics.cosine, metrics }); } } // Sort by similarity and return top K results.sort((a, b) => b.similarity - a.similarity); return results.slice(0, topK); } /** * Cluster embeddings using K-means */ async clusterEmbeddings( embeddings: Map<string, EmbeddingVector>, numClusters: number, maxIterations: number = 100 ): Promise<ClusterInfo[]> { console.log(`Clustering ${embeddings.size} embeddings into ${numClusters} clusters...`); const points = Array.from(embeddings.values()).map(e => new Float32Array(e.values)); const labels = Array.from(embeddings.keys()); // Initialize centroids randomly const centroids = this.initializeCentroids(points, numClusters); const assignments = new Array(points.length).fill(0); // K-means iterations for (let iter = 0; iter < maxIterations; iter++) { let changed = false; // Assign points to nearest centroids for (let i = 0; i < points.length; i++) { const newAssignment = this.findNearestCentroid(points[i], centroids); if (newAssignment !== assignments[i]) { assignments[i] = newAssignment; changed = true; } } if (!changed) break; // Update centroids this.updateCentroids(points, assignments, centroids); } // Build cluster info const clusters: ClusterInfo[] = []; for (let k = 0; k < numClusters; k++) { const members = labels.filter((_, i) => assignments[i] === k); const cohesion = this.calculateCohesion(points, assignments, k, centroids[k]); const separation = this.calculateSeparation(centroids, k); const silhouetteScore = this.calculateSilhouetteScore(points, assignments, k); clusters.push({ id: `cluster_${k}`, centroid: centroids[k], members, cohesion, separation, silhouetteScore }); } console.log('Clustering completed'); return clusters; } /** * Reduce dimensionality of embeddings */ async reduceDimensionality( embeddings: Map<string, EmbeddingVector>, targetDimensions: number, method: 'pca' | 'tsne' | 'umap' = 'pca' ): Promise<{ reducedEmbeddings: Map<string, EmbeddingVector>; reduction: DimensionalityReduction }> { console.log(`Reducing dimensionality from ${this.dimensions} to ${targetDimensions} using ${method.toUpperCase()}...`); const points = Array.from(embeddings.values()).map(e => new Float32Array(e.values)); const labels = Array.from(embeddings.keys()); let reducedPoints: Float32Array[]; let reduction: DimensionalityReduction; switch (method) { case 'pca': const pcaResult = this.performPCA(points, targetDimensions); reducedPoints = pcaResult.reducedPoints; reduction = { method: 'pca', originalDimensions: this.dimensions, reducedDimensions: targetDimensions, explainedVariance: pcaResult.explainedVariance, components: pcaResult.components }; break; case 'tsne': reducedPoints = this.performTSNE(points, targetDimensions); reduction = { method: 'tsne', originalDimensions: this.dimensions, reducedDimensions: targetDimensions }; break; case 'umap': reducedPoints = this.performUMAP(points, targetDimensions); reduction = { method: 'umap', originalDimensions: this.dimensions, reducedDimensions: targetDimensions }; break; default: throw new Error(`Unsupported dimensionality reduction method: ${method}`); } // Convert back to embedding format const reducedEmbeddings = new Map<string, EmbeddingVector>(); for (let i = 0; i < labels.length; i++) { const values = Array.from(reducedPoints[i]); reducedEmbeddings.set(labels[i], { values, dimensions: targetDimensions, magnitude: this.calculateMagnitude(reducedPoints[i]) }); } console.log('Dimensionality reduction completed'); return { reducedEmbeddings, reduction }; } /** * Export trained models */ async exportModels(outputPath: string): Promise<void> { const modelData = { word2vec: this.word2vecModel ? { vocabulary: Array.from(this.word2vecModel.vocabulary.entries()), embeddings: this.word2vecModel.embeddings.map(arr => Array.from(arr)), dimensions: this.word2vecModel.dimensions, windowSize: this.word2vecModel.windowSize, minCount: this.word2vecModel.minCount, iterations: this.word2vecModel.iterations } : null, doc2vec: this.doc2vecModel ? { wordModel: { vocabulary: Array.from(this.doc2vecModel.wordModel.vocabulary.entries()), embeddings: this.doc2vecModel.wordModel.embeddings.map(arr => Array.from(arr)), dimensions: this.doc2vecModel.wordModel.dimensions }, documentEmbeddings: Array.from(this.doc2vecModel.documentEmbeddings.entries()) .map(([id, emb]) => [id, Array.from(emb)]), alpha: this.doc2vecModel.alpha, minAlpha: this.doc2vecModel.minAlpha } : null, codeModel: this.codeModel ? { tokenEmbeddings: Array.from(this.codeModel.tokenEmbeddings.entries()) .map(([token, emb]) => [token, Array.from(emb)]), structuralEmbeddings: Array.from(this.codeModel.structuralEmbeddings.entries()) .map(([token, emb]) => [token, Array.from(emb)]), semanticEmbeddings: Array.from(this.codeModel.semanticEmbeddings.entries()) .map(([token, emb]) => [token, Array.from(emb)]), dimensions: this.codeModel.dimensions, combinationWeights: this.codeModel.combinationWeights } : null, metadata: { dimensions: this.dimensions, windowSize: this.windowSize, minCount: this.minCount, createdAt: Date.now() } }; await fs.writeFile(outputPath, JSON.stringify(modelData, null, 2)); console.log(`Models exported to ${outputPath}`); } /** * Import trained models */ async importModels(inputPath: string): Promise<void> { const modelData = JSON.parse(await fs.readFile(inputPath, 'utf-8')); if (modelData.word2vec) { this.word2vecModel = { vocabulary: new Map(modelData.word2vec.vocabulary), embeddings: modelData.word2vec.embeddings.map((arr: number[]) => new Float32Array(arr)), dimensions: modelData.word2vec.dimensions, windowSize: modelData.word2vec.windowSize, minCount: modelData.word2vec.minCount, iterations: modelData.word2vec.iterations }; } if (modelData.doc2vec) { this.doc2vecModel = { wordModel: { vocabulary: new Map(modelData.doc2vec.wordModel.vocabulary), embeddings: modelData.doc2vec.wordModel.embeddings.map((arr: number[]) => new Float32Array(arr)), dimensions: modelData.doc2vec.wordModel.dimensions, windowSize: 0, minCount: 0, iterations: 0 }, documentEmbeddings: new Map( modelData.doc2vec.documentEmbeddings.map(([id, emb]: [string, number[]]) => [id, new Float32Array(emb)] ) ), alpha: modelData.doc2vec.alpha, minAlpha: modelData.doc2vec.minAlpha }; } if (modelData.codeModel) { this.codeModel = { tokenEmbeddings: new Map( modelData.codeModel.tokenEmbeddings.map(([token, emb]: [string, number[]]) => [token, new Float32Array(emb)] ) ), structuralEmbeddings: new Map( modelData.codeModel.structuralEmbeddings.map(([token, emb]: [string, number[]]) => [token, new Float32Array(emb)] ) ), semanticEmbeddings: new Map( modelData.codeModel.semanticEmbeddings.map(([token, emb]: [string, number[]]) => [token, new Float32Array(emb)] ) ), dimensions: modelData.codeModel.dimensions, combinationWeights: modelData.codeModel.combinationWeights }; } console.log(`Models imported from ${inputPath}`); } // Private helper methods private buildVocabulary(corpus: string[][]): Map<string, number> { const tokenCounts = new Map<string, number>(); // Count token frequencies for (const sentence of corpus) { for (const token of sentence) { tokenCounts.set(token, (tokenCounts.get(token) || 0) + 1); } } // Filter by minimum count and build vocabulary const vocabulary = new Map<string, number>(); let index = 0; for (const [token, count] of tokenCounts) { if (count >= this.minCount) { vocabulary.set(token, index++); } } return vocabulary; } private initializeEmbeddings(vocabularySize: number): Float32Array[] { const embeddings: Float32Array[] = []; for (let i = 0; i < vocabularySize; i++) { const embedding = new Float32Array(this.dimensions); // Xavier initialization const scale = Math.sqrt(2.0 / this.dimensions); for (let j = 0; j < this.dimensions; j++) { embedding[j] = (Math.random() - 0.5) * 2 * scale; } embeddings.push(embedding); } return embeddings; } private async trainSkipGram( corpus: string[][], vocabulary: Map<string, number>, embeddings: Float32Array[] ): Promise<void> { const learningRate = 0.025; const negativeSamples = 5; const iterations = 10; for (let iter = 0; iter < iterations; iter++) { console.log(`Training iteration ${iter + 1}/${iterations}`); for (const sentence of corpus) { for (let i = 0; i < sentence.length; i++) { const centerWord = sentence[i]; const centerIndex = vocabulary.get(centerWord); if (centerIndex === undefined) continue; // Train on context words for (let j = Math.max(0, i - this.windowSize); j <= Math.min(sentence.length - 1, i + this.windowSize); j++) { if (i === j) continue; const contextWord = sentence[j]; const contextIndex = vocabulary.get(contextWord); if (contextIndex === undefined) continue; // Positive sample this.updateEmbeddings(embeddings, centerIndex, contextIndex, 1, learningRate); // Negative samples for (let k = 0; k < negativeSamples; k++) { const negativeIndex = Math.floor(Math.random() * embeddings.length); if (negativeIndex !== contextIndex) { this.updateEmbeddings(embeddings, centerIndex, negativeIndex, 0, learningRate); } } } } } } } private updateEmbeddings( embeddings: Float32Array[], centerIndex: number, contextIndex: number, label: number, learningRate: number ): void { const centerEmb = embeddings[centerIndex]; const contextEmb = embeddings[contextIndex]; // Calculate dot product let dotProduct = 0; for (let i = 0; i < this.dimensions; i++) { dotProduct += centerEmb[i] * contextEmb[i]; } // Sigmoid activation const sigmoid = 1 / (1 + Math.exp(-dotProduct)); const gradient = (label - sigmoid) * learningRate; // Update embeddings for (let i = 0; i < this.dimensions; i++) { const centerGrad = gradient * contextEmb[i]; const contextGrad = gradient * centerEmb[i]; centerEmb[i] += centerGrad; contextEmb[i] += contextGrad; } } private async trainDocumentEmbedding( document: { id: string; tokens: string[] }, wordModel: Word2VecModel ): Promise<Float32Array> { const embedding = new Float32Array(this.dimensions); let count = 0; // Average word embeddings for (const token of document.tokens) { const index = wordModel.vocabulary.get(token); if (index !== undefined) { const wordEmb = wordModel.embeddings[index]; for (let i = 0; i < this.dimensions; i++) { embedding[i] += wordEmb[i]; } count++; } } // Normalize by count if (count > 0) { for (let i = 0; i < this.dimensions; i++) { embedding[i] /= count; } } return embedding; } private generateCodeEmbedding( tokens: string[], structure: string[], semantics: string[], features: CombinedFeatures ): Float32Array { if (!this.codeModel) { throw new Error('Code model not trained'); } const embedding = new Float32Array(this.dimensions); const weights = this.codeModel.combinationWeights; // Combine token embeddings const tokenEmb = this.averageEmbeddings(tokens, this.codeModel.tokenEmbeddings); for (let i = 0; i < this.dimensions; i++) { embedding[i] += tokenEmb[i] * weights.token; } // Combine structural embeddings const structEmb = this.averageEmbeddings(structure, this.codeModel.structuralEmbeddings); for (let i = 0; i < this.dimensions; i++) { embedding[i] += structEmb[i] * weights.structural; } // Combine semantic embeddings const semEmb = this.averageEmbeddings(semantics, this.codeModel.semanticEmbeddings); for (let i = 0; i < this.dimensions; i++) { embedding[i] += semEmb[i] * weights.semantic; } // Add feature-based components this.addFeatureComponents(embedding, features); return embedding; } private async generateDocEmbedding(tokens: string[]): Promise<Float32Array> { if (!this.doc2vecModel) { throw new Error('Doc2Vec model not trained'); } return this.averageEmbeddings(tokens, this.doc2vecModel.wordModel.embeddings, this.doc2vecModel.wordModel.vocabulary); } private generateWordEmbedding(tokens: string[]): Float32Array { if (!this.word2vecModel) { throw new Error('Word2Vec model not trained'); } return this.averageEmbeddings(tokens, this.word2vecModel.embeddings, this.word2vecModel.vocabulary); } private generateTfIdfEmbedding(tokens: string[]): Float32Array { // Fallback TF-IDF based embedding const embedding = new Float32Array(this.dimensions); const tokenCounts = new Map<string, number>(); // Count token frequencies for (const token of tokens) { tokenCounts.set(token, (tokenCounts.get(token) || 0) + 1); } // Simple hash-based embedding for (const [token, count] of tokenCounts) { const hash = this.hashString(token); const tf = count / tokens.length; for (let i = 0; i < this.dimensions; i++) { const index = (hash + i) % this.dimensions; embedding[index] += tf; } } return embedding; } private averageEmbeddings( tokens: string[], embeddingSource: Map<string, Float32Array> | Float32Array[], vocabulary?: Map<string, number> ): Float32Array { const embedding = new Float32Array(this.dimensions); let count = 0; for (const token of tokens) { let tokenEmb: Float32Array | undefined; if (embeddingSource instanceof Map) { tokenEmb = embeddingSource.get(token); } else if (vocabulary) { const index = vocabulary.get(token); if (index !== undefined) { tokenEmb = embeddingSource[index]; } } if (tokenEmb) { for (let i = 0; i < this.dimensions; i++) { embedding[i] += tokenEmb[i]; } count++; } } // Normalize by count if (count > 0) { for (let i = 0; i < this.dimensions; i++) { embedding[i] /= count; } } return embedding; } private addFeatureComponents(embedding: Float32Array, features: CombinedFeatures): void { // Add feature-based components to the last dimensions const featureStart = this.dimensions - 20; if (featureStart > 0) { embedding[featureStart] += features.relevanceScore; embedding[featureStart + 1] += features.qualityScore; embedding[featureStart + 2] += features.maintainabilityScore; embedding[featureStart + 3] += 1 - features.riskScore; embedding[featureStart + 4] += features.semantic.purposeClarity; embedding[featureStart + 5] += features.semantic.businessRelevance; embedding[featureStart + 6] += features.code.maintainabilityIndex / 100; embedding[featureStart + 7] += features.usage.totalUsageCount / 100; } } private normalizeEmbedding(embedding: Float32Array): void { const magnitude = this.calculateMagnitude(embedding); if (magnitude > 0) { for (let i = 0; i < embedding.length; i++) { embedding[i] /= magnitude; } } } private calculateMagnitude(embedding: Float32Array): number { let sum = 0; for (let i = 0; i < embedding.length; i++) { sum += embedding[i] * embedding[i]; } return Math.sqrt(sum); } private tokenizeQuery(query: string): string[] { return query .toLowerCase() .replace(/[^a-zA-Z0-9\s]/g, ' ') .split(/\s+/) .filter(token => token.length > 1); } // Similarity calculation methods private cosineSimilarity(vec1: Float32Array, vec2: Float32Array): number { if (vec1.length !== vec2.length) return 0; let dotProduct = 0; let norm1 = 0; let norm2 = 0; for (let i = 0; i < vec1.length; i++) { dotProduct += vec1[i] * vec2[i]; norm1 += vec1[i] * vec1[i]; norm2 += vec2[i] * vec2[i]; } const magnitude = Math.sqrt(norm1) * Math.sqrt(norm2); return magnitude > 0 ? dotProduct / magnitude : 0; } private euclideanDistance(vec1: Float32Array, vec2: Float32Array): number { if (vec1.length !== vec2.length) return Infinity; let sum = 0; for (let i = 0; i < vec1.length; i++) { const diff = vec1[i] - vec2[i]; sum += diff * diff; } return Math.sqrt(sum); } private manhattanDistance(vec1: Float32Array, vec2: Float32Array): number { if (vec1.length !== vec2.length) return Infinity; let sum = 0; for (let i = 0; i < vec1.length; i++) { sum += Math.abs(vec1[i] - vec2[i]); } return sum; } private jaccardSimilarity(vec1: number[], vec2: number[]): number { const set1 = new Set(vec1.map((v, i) => v > 0 ? i : -1).filter(i => i >= 0)); const set2 = new Set(vec2.map((v, i) => v > 0 ? i : -1).filter(i => i >= 0)); const intersection = new Set([...set1].filter(x => set2.has(x))); const union = new Set([...set1, ...set2]); return union.size > 0 ? intersection.size / union.size : 0; } private pearsonCorrelation(vec1: Float32Array, vec2: Float32Array): number { if (vec1.length !== vec2.length) return 0; const n = vec1.length; let sum1 = 0, sum2 = 0, sum1Sq = 0, sum2Sq = 0, pSum = 0; for (let i = 0; i < n; i++) { sum1 += vec1[i]; sum2 += vec2[i]; sum1Sq += vec1[i] * vec1[i]; sum2Sq += vec2[i] * vec2[i]; pSum += vec1[i] * vec2[i]; } const num = pSum - (sum1 * sum2 / n); const den = Math.sqrt((sum1Sq - sum1 * sum1 / n) * (sum2Sq - sum2 * sum2 / n)); return den > 0 ? num / den : 0; } // Clustering helper methods private initializeCentroids(points: Float32Array[], numClusters: number): Float32Array[] { const centroids: Float32Array[] = []; const dimensions = points[0].length; // K-means++ initialization const firstIndex = Math.floor(Math.random() * points.length); centroids.push(new Float32Array(points[firstIndex])); for (let k = 1; k < numClusters; k++) { const distances: number[] = []; let totalDistance = 0; for (const point of points) { let minDistance = Infinity; for (const centroid of centroids) { const distance = this.euclideanDistance(point, centroid); minDistance = Math.min(minDistance, distance); } distances.push(minDistance * minDistance); totalDistance += minDistance * minDistance; } const random = Math.random() * totalDistance; let cumulative = 0; for (let i = 0; i < points.length; i++) { cumulative += distances[i]; if (cumulative >= random) { centroids.push(new Float32Array(points[i])); break; } } } return centroids; } private findNearestCentroid(point: Float32Array, centroids: Float32Array[]): number { let minDistance = Infinity; let nearestIndex = 0; for (let i = 0; i < centroids.length; i++) { const distance = this.euclideanDistance(point, centroids[i]); if (distance < minDistance) { minDistance = distance; nearestIndex = i; } } return nearestIndex; } private updateCentroids( points: Float32Array[], assignments: number[], centroids: Float32Array[] ): void { const dimensions = points[0].length; for (let k = 0; k < centroids.length; k++) { const clusterPoints = points.filter((_, i) => assignments[i] === k); if (clusterPoints.length > 0) { centroids[k].fill(0); for (const point of clusterPoints) { for (let d = 0; d < dimensions; d++) { centroids[k][d] += point[d]; } } for (let d = 0; d < dimensions; d++) { centroids[k][d] /= clusterPoints.length; } } } } private calculateCohesion( points: Float32Array[], assignments: number[], clusterIndex: number, centroid: Float32Array ): number { const clusterPoints = points.filter((_, i) => assignments[i] === clusterIndex); if (clusterPoints.length === 0) return 0; let totalDistance = 0; for (const point of clusterPoints) { totalDistance += this.euclideanDistance(point, centroid); } return totalDistance / clusterPoints.length; } private calculateSeparation(centroids: Float32Array[], clusterIndex: number): number { let minDistance = Infinity; for (let i = 0; i < centroids.length; i++) { if (i !== clusterIndex) { const distance = this.euclideanDistance(centroids[clusterIndex], centroids[i]); minDistance = Math.min(minDistance, distance); } } return minDistance; } private calculateSilhouetteScore( points: Float32Array[], assignments: number[], clusterIndex: number ): number { const clusterPoints = points.filter((_, i) => assignments[i] === clusterIndex); if (clusterPoints.length <= 1) return 0; let totalScore = 0; for (const point of clusterPoints) { const a = this.averageIntraClusterDistance(point, clusterPoints); const b = this.averageNearestClusterDistance(point, points, assignments, clusterIndex); const silhouette = (b - a) / Math.max(a, b); totalScore += silhouette; } return totalScore / clusterPoints.length; } private averageIntraClusterDistance(point: Float32Array, clusterPoints: Float32Array[]): number { if (clusterPoints.length <= 1) return 0; let totalDistance = 0; let count = 0; for (const other of clusterPoints) { if (other !== point) { totalDistance += this.euclideanDistance(point, other); count++; } } return count > 0 ? totalDistance / count : 0; } private averageNearestClusterDistance( point: Float32Array, allPoints: Float32Array[], assignments: number[], currentCluster: number ): number { const otherClusters = new Set(assignments.filter(a => a !== currentCluster)); let minAvgDistance = Infinity; for (const otherCluster of otherClusters) { const otherClusterPoints = allPoints.filter((_, i) => assignments[i] === otherCluster); if (otherClusterPoints.length > 0) { let totalDistance = 0; for (const other of otherClusterPoints) { totalDistance += this.euclideanDistance(point, other); } const avgDistance = totalDistance / otherClusterPoints.length; minAvgDistance = Math.min(minAvgDistance, avgDistance); } } return minAvgDistance; } // Dimensionality reduction methods (simplified implementations) private performPCA( points: Float32Array[], targetDimensions: number ): { reducedPoints: Float32Array[]; explainedVariance: number; components: Float32Array[] } { // Simplified PCA implementation const n = points.length; const d = points[0].length; // Center the data const mean = new Float32Array(d); for (const point of points) { for (let i = 0; i < d; i++) { mean[i] += point[i]; } } for (let i = 0; i < d; i++) { mean[i] /= n; } const centeredPoints = points.map(point => { const centered = new Float32Array(d); for (let i = 0; i < d; i++) { centered[i] = point[i] - mean[i]; } return centered; }); // For simplicity, just take the first targetDimensions const reducedPoints = centeredPoints.map(point => { const reduced = new Float32Array(targetDimensions); for (let i = 0; i < targetDimensions; i++) { reduced[i] = point[i] || 0; } return reduced; }); return { reducedPoints, explainedVariance: 0.8, // Placeholder components: [] // Placeholder }; } private performTSNE(points: Float32Array[], targetDimensions: number): Float32Array[] { // Simplified t-SNE implementation (placeholder) return points.map(point => { const reduced = new Float32Array(targetDimensions); for (let i = 0; i < targetDimensions; i++) { reduced[i] = (Math.random() - 0.5) * 2; // Random projection for now } return reduced; }); } private performUMAP(points: Float32Array[], targetDimensions: number): Float32Array[] { // Simplified UMAP implementation (placeholder) return points.map(point => { const reduced = new Float32Array(targetDimensions); for (let i = 0; i < targetDimensions; i++) { reduced[i] = (Math.random() - 0.5) * 2; // Random projection for now } return reduced; }); } private hashString(str: string): number { let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = ((hash << 5) - hash) + char; hash = hash & hash; // Convert to 32-bit integer } return Math.abs(hash); } /** * Clear all caches */ clearCache(): void { this.embeddingCache.clear(); this.similarityCache.clear(); } /** * Get embedding statistics */ getEmbeddingStats(): { cacheSize: number; modelInfo: { word2vec: boolean; doc2vec: boolean; codeModel: boolean; }; dimensions: number; } { return { cacheSize: this.embeddingCache.size, modelInfo: { word2vec: this.word2vecModel !== null, doc2vec: this.doc2vecModel !== null, codeModel: this.codeModel !== null }, dimensions: this.dimensions }; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/FosterG4/mcpsaver'

If you have feedback or need assistance with the MCP directory API, please join our Discord server