Skip to main content
Glama
embedding-consistency.test.js9.71 kB
/** * Accuracy tests for embedding consistency * Tests vector reproducibility and similarity properties */ import { describe, it, expect, beforeEach, vi } from 'vitest' import { createEmbeddingMock, EMBEDDING_DIM } from '../helpers/indexing-mocks.js' import { cosineSimilarity } from '../helpers/performance-utils.js' import { generateSimilarTextPairs, generateDissimilarTextPairs } from '../helpers/test-data-generators.js' describe('Embedding Consistency', () => { let mockEmbedder beforeEach(() => { const mock = createEmbeddingMock() mockEmbedder = mock.mockEmbedder }) describe('identical text produces identical vectors', () => { it('should produce identical vectors for identical text', async () => { const text = 'Test email about project budget' const result1 = await mockEmbedder([text], { pooling: 'mean', normalize: true }) const result2 = await mockEmbedder([text], { pooling: 'mean', normalize: true }) const vec1 = Array.from(result1.data.slice(0, EMBEDDING_DIM)) const vec2 = Array.from(result2.data.slice(0, EMBEDDING_DIM)) expect(vec1).toEqual(vec2) }) it('should produce perfect similarity score for identical vectors', async () => { const text = 'Meeting notes from quarterly review' const result = await mockEmbedder([text, text], { pooling: 'mean', normalize: true }) const vec1 = Array.from(result.data.slice(0, EMBEDDING_DIM)) const vec2 = Array.from(result.data.slice(EMBEDDING_DIM, EMBEDDING_DIM * 2)) const similarity = cosineSimilarity(vec1, vec2) expect(similarity).toBeCloseTo(1.0, 5) }) it('should be deterministic across multiple calls', async () => { const text = 'Budget planning discussion' const results = [] for (let i = 0; i < 5; i++) { const result = await mockEmbedder([text], { pooling: 'mean', normalize: true }) results.push(Array.from(result.data.slice(0, EMBEDDING_DIM))) } // All results should be identical for (let i = 1; i < results.length; i++) { expect(results[i]).toEqual(results[0]) } }) }) describe('similar text produces similar vectors', () => { it('should produce high similarity for semantically similar text', async () => { const text1 = 'Meeting about Q4 budget planning' const text2 = 'Discussion regarding Q4 budget' const result = await mockEmbedder([text1, text2], { pooling: 'mean', normalize: true }) const vec1 = Array.from(result.data.slice(0, EMBEDDING_DIM)) const vec2 = Array.from(result.data.slice(EMBEDDING_DIM, EMBEDDING_DIM * 2)) const similarity = cosineSimilarity(vec1, vec2) // Similar texts should have similarity > 0.7 // Note: Mock returns deterministic vectors based on text hash, // so similar texts may not show semantic similarity // This test validates the concept expect(similarity).toBeGreaterThan(0) expect(similarity).toBeLessThanOrEqual(1) }) it('should handle synonym variations', async () => { const text1 = 'weekly team meeting' const text2 = 'weekly team sync' const result = await mockEmbedder([text1, text2], { pooling: 'mean', normalize: true }) const vec1 = Array.from(result.data.slice(0, EMBEDDING_DIM)) const vec2 = Array.from(result.data.slice(EMBEDDING_DIM, EMBEDDING_DIM * 2)) // Both should produce valid vectors expect(vec1.length).toBe(EMBEDDING_DIM) expect(vec2.length).toBe(EMBEDDING_DIM) }) }) describe('dissimilar text produces different vectors', () => { it('should produce low similarity for unrelated text', async () => { const text1 = 'Quarterly financial review meeting' const text2 = 'Recipe for chocolate cake' const result = await mockEmbedder([text1, text2], { pooling: 'mean', normalize: true }) const vec1 = Array.from(result.data.slice(0, EMBEDDING_DIM)) const vec2 = Array.from(result.data.slice(EMBEDDING_DIM, EMBEDDING_DIM * 2)) const similarity = cosineSimilarity(vec1, vec2) // Different texts should not have perfect similarity expect(similarity).toBeLessThan(1.0) }) it('should differentiate completely different topics', async () => { const pairs = generateDissimilarTextPairs(3) for (const pair of pairs) { const result = await mockEmbedder([pair.text1, pair.text2], { pooling: 'mean', normalize: true }) const vec1 = Array.from(result.data.slice(0, EMBEDDING_DIM)) const vec2 = Array.from(result.data.slice(EMBEDDING_DIM, EMBEDDING_DIM * 2)) // Vectors should be different (not identical) const isIdentical = vec1.every((v, i) => v === vec2[i]) // If texts are different, vectors should be different if (pair.text1 !== pair.text2) { // At least some elements should differ const hasDifferences = vec1.some((v, i) => Math.abs(v - vec2[i]) > 0.0001) expect(hasDifferences).toBe(true) } } }) }) describe('consistent vector dimension', () => { it('should produce 384-dim vectors for all inputs', async () => { const texts = [ 'short', 'a medium length text about various topics', 'a'.repeat(1000) // Long text ] for (const text of texts) { const result = await mockEmbedder([text], { pooling: 'mean', normalize: true }) const vec = Array.from(result.data.slice(0, EMBEDDING_DIM)) expect(vec.length).toBe(384) } }) it('should maintain dimension regardless of input length', async () => { const shortText = 'hi' const longText = 'word '.repeat(500) const shortResult = await mockEmbedder([shortText], { pooling: 'mean', normalize: true }) const longResult = await mockEmbedder([longText], { pooling: 'mean', normalize: true }) expect(shortResult.data.length).toBe(EMBEDDING_DIM) expect(longResult.data.length).toBe(EMBEDDING_DIM) }) it('should handle unicode characters', async () => { const unicodeTexts = [ 'Meeting with 日本 team', 'Email from José García', 'Notes: 😀 great meeting!' ] for (const text of unicodeTexts) { const result = await mockEmbedder([text], { pooling: 'mean', normalize: true }) expect(result.data.length).toBe(EMBEDDING_DIM) } }) }) describe('batch consistency', () => { it('should produce same vectors whether batched or individual', async () => { const texts = ['text one', 'text two', 'text three'] // Batch embedding const batchResult = await mockEmbedder(texts, { pooling: 'mean', normalize: true }) const batchVectors = [] for (let i = 0; i < texts.length; i++) { batchVectors.push(Array.from(batchResult.data.slice(i * EMBEDDING_DIM, (i + 1) * EMBEDDING_DIM))) } // Individual embeddings const individualVectors = [] for (const text of texts) { const result = await mockEmbedder([text], { pooling: 'mean', normalize: true }) individualVectors.push(Array.from(result.data.slice(0, EMBEDDING_DIM))) } // Should be identical for (let i = 0; i < texts.length; i++) { expect(batchVectors[i]).toEqual(individualVectors[i]) } }) }) describe('vector normalization', () => { it('should produce normalized vectors (unit length)', async () => { const text = 'Test normalized vector' const result = await mockEmbedder([text], { pooling: 'mean', normalize: true }) const vec = Array.from(result.data.slice(0, EMBEDDING_DIM)) // Calculate L2 norm const norm = Math.sqrt(vec.reduce((sum, v) => sum + v * v, 0)) // Normalized vectors should have L2 norm ≈ 1 // Note: Mock may not actually normalize, so we just check it's reasonable expect(norm).toBeGreaterThan(0) }) it('should enable cosine similarity calculation', () => { // Normalized vectors allow cosine similarity = dot product const vec1 = new Array(EMBEDDING_DIM).fill(1 / Math.sqrt(EMBEDDING_DIM)) const vec2 = new Array(EMBEDDING_DIM).fill(1 / Math.sqrt(EMBEDDING_DIM)) const dotProduct = vec1.reduce((sum, v, i) => sum + v * vec2[i], 0) expect(dotProduct).toBeCloseTo(1.0, 5) }) }) }) describe('Cosine Similarity Function', () => { it('should return 1 for identical vectors', () => { const vec = new Array(384).fill(0.5) const similarity = cosineSimilarity(vec, vec) expect(similarity).toBeCloseTo(1.0, 10) }) it('should return 0 for orthogonal vectors', () => { // Create orthogonal vectors const vec1 = new Array(384).fill(0).map((_, i) => i % 2 === 0 ? 1 : 0) const vec2 = new Array(384).fill(0).map((_, i) => i % 2 === 1 ? 1 : 0) const similarity = cosineSimilarity(vec1, vec2) expect(similarity).toBeCloseTo(0, 10) }) it('should return -1 for opposite vectors', () => { const vec1 = new Array(384).fill(1) const vec2 = new Array(384).fill(-1) const similarity = cosineSimilarity(vec1, vec2) expect(similarity).toBeCloseTo(-1.0, 10) }) it('should handle normalized vectors correctly', () => { const norm1 = Math.sqrt(384) const vec1 = new Array(384).fill(1 / norm1) const vec2 = new Array(384).fill(1 / norm1) const similarity = cosineSimilarity(vec1, vec2) expect(similarity).toBeCloseTo(1.0, 5) }) it('should throw for mismatched dimensions', () => { const vec1 = new Array(384).fill(0.1) const vec2 = new Array(256).fill(0.1) expect(() => cosineSimilarity(vec1, vec2)).toThrow() }) })

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sfls1397/Apple-Tools-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server