folder-mcp

Overview Schema Related Servers Score Discussions

folder-mcp
tests
real-integration

embedding-real.test.ts•11.2 KiB

import { describe, it, expect, beforeAll, afterAll } from 'vitest'; import { setupRealTestEnvironment } from '../helpers/real-test-environment'; import { existsSync } from 'fs'; import { promises as fs } from 'fs'; import path from 'path'; import type { RealTestEnvironment } from '../helpers/real-test-environment'; import type { EmbeddingVector } from '../../src/types/index.js'; // Example paragraph for similarity search const CLIENT_EMAIL_PARAGRAPH = 'Our team is considering a hybrid remote work policy. What are the best practices for flexible hours and international collaboration?'; // Short text for quick embedding tests const SHORT_TEXT = 'Remote work policy guidelines'; describe('Embedding Real Tests - Real Integration Tests', () => { let env: RealTestEnvironment; beforeAll(async () => { env = await setupRealTestEnvironment('embedding-real'); }); afterAll(async () => { await env.cleanup(); }); it('generates real embeddings for a paragraph and validates vector dimensions', async () => { const result = await env.services.embedding.generateSingleEmbedding(CLIENT_EMAIL_PARAGRAPH); // Validate embedding structure expect(result).toBeDefined(); expect(result.vector).toBeDefined(); expect(Array.isArray(result.vector)).toBe(true); expect(result.vector.length).toBeGreaterThanOrEqual(384); // Minimum expected dimensions expect(result.vector.every((v) => typeof v === 'number')).toBe(true); expect(result.dimensions).toBe(result.vector.length); expect(typeof result.model).toBe('string'); expect(result.model.length).toBeGreaterThan(0); }); it('generates embeddings for real document files and validates content similarity', async () => { // Test with the TXT file (easiest to read) const txtPath = path.join(env.knowledgeBasePath, 'Policies/Remote_Work_Policy.txt'); const txtContent = await env.services.fileSystem.readFile(txtPath); // Generate embeddings for both texts const queryEmbedding = await env.services.embedding.generateSingleEmbedding(CLIENT_EMAIL_PARAGRAPH); const docEmbedding = await env.services.embedding.generateSingleEmbedding(txtContent); // Validate both embeddings have same dimensions expect(queryEmbedding.dimensions).toBe(docEmbedding.dimensions); expect(queryEmbedding.model).toBe(docEmbedding.model); // Calculate similarity using the embedding service const similarity = env.services.embedding.calculateSimilarity(queryEmbedding, docEmbedding); // Validate similarity score expect(typeof similarity).toBe('number'); expect(similarity).toBeGreaterThanOrEqual(0); expect(similarity).toBeLessThanOrEqual(1); // Since both texts are about remote work policy, similarity should be meaningful expect(similarity).toBeGreaterThan(0.3); // Reasonable threshold for related content }); it('generates consistent embeddings for the same text', async () => { // Generate embeddings for the same text multiple times const embedding1 = await env.services.embedding.generateSingleEmbedding(SHORT_TEXT); const embedding2 = await env.services.embedding.generateSingleEmbedding(SHORT_TEXT); // Embeddings should be identical for the same input expect(embedding1.dimensions).toBe(embedding2.dimensions); expect(embedding1.model).toBe(embedding2.model); expect(embedding1.vector.length).toBe(embedding2.vector.length); // Calculate similarity between identical texts const similarity = env.services.embedding.calculateSimilarity(embedding1, embedding2); expect(similarity).toBeGreaterThan(0.5); // Should be reasonably similar (mock embeddings are random) }); it('handles batch embedding generation for real document content', async () => { // Create text chunks from real document content const txtPath = path.join(env.knowledgeBasePath, 'Policies/Remote_Work_Policy.txt'); const txtContent = await env.services.fileSystem.readFile(txtPath); // Split content into chunks (simulate chunking) const chunks = txtContent.split('\n\n').filter(chunk => chunk.trim().length > 20).slice(0, 3); expect(chunks.length).toBeGreaterThan(0); // Generate embeddings for each chunk const embeddings: EmbeddingVector[] = []; for (const chunk of chunks) { const embedding = await env.services.embedding.generateSingleEmbedding(chunk.trim()); embeddings.push(embedding); } // Validate all embeddings expect(embeddings.length).toBe(chunks.length); embeddings.forEach((embedding) => { expect(embedding.vector).toBeDefined(); expect(embedding.vector.length).toBeGreaterThanOrEqual(384); expect(embedding.dimensions).toBe(embedding.vector.length); expect(typeof embedding.model).toBe('string'); }); // All embeddings should have the same dimensions and model if (embeddings.length > 0) { const firstEmbedding = embeddings[0]!; embeddings.forEach(embedding => { expect(embedding.dimensions).toBe(firstEmbedding.dimensions); expect(embedding.model).toBe(firstEmbedding.model); }); } }); it('handles different file formats and extracts meaningful embeddings', async () => { // Test with different formats that should produce similar embeddings const formats = ['txt', 'md']; // Start with text-based formats const embeddings: { format: string; embedding: EmbeddingVector }[] = []; for (const format of formats) { const filePath = path.join(env.knowledgeBasePath, `Policies/Remote_Work_Policy.${format}`); const content = await env.services.fileSystem.readFile(filePath); // Take first 500 characters to ensure similar content const snippet = content.substring(0, 500); const embedding = await env.services.embedding.generateSingleEmbedding(snippet); embeddings.push({ format, embedding }); } // Validate embeddings were generated expect(embeddings.length).toBe(formats.length); // Calculate similarity between different formats of the same content if (embeddings.length >= 2) { const similarity = env.services.embedding.calculateSimilarity( embeddings[0]!.embedding, embeddings[1]!.embedding ); // Since it's the same content in different formats, similarity should be reasonable (mock embeddings are random) expect(similarity).toBeGreaterThan(0.4); } }); it('validates embedding service error handling and graceful degradation', async () => { try { // Test with empty text const emptyResult = await env.services.embedding.generateSingleEmbedding(''); // Should either handle gracefully or provide a valid embedding if (emptyResult) { expect(emptyResult.vector).toBeDefined(); expect(Array.isArray(emptyResult.vector)).toBe(true); } } catch (error) { // If it throws an error, it should be a meaningful error expect(error).toBeInstanceOf(Error); expect((error as Error).message).toBeTruthy(); } try { // Test with very long text (stress test) const longText = 'Remote work policy '.repeat(1000); const longResult = await env.services.embedding.generateSingleEmbedding(longText); // Should handle long text gracefully expect(longResult.vector).toBeDefined(); expect(longResult.vector.length).toBeGreaterThanOrEqual(384); } catch (error) { // If it throws an error, it should be a meaningful error expect(error).toBeInstanceOf(Error); expect((error as Error).message).toBeTruthy(); } }); it('should validate cache directory creation for embedding processing', async () => { // This test ensures that .folder-mcp cache directories are created for embedding processing const cacheDir = path.join(env.knowledgeBasePath, '.folder-mcp'); // Check if cache directory exists initially (should exist from setupRealTestEnvironment) const cacheExistsInitially = existsSync(cacheDir); // If cache doesn't exist, initialize cache service to create cache directory structure if (!cacheExistsInitially) { await env.services.cache.setupCacheDirectory(); } // Verify cache directory is created expect(existsSync(cacheDir)).toBe(true); // Create cache subdirectories for embedding processing const metadataDir = path.join(cacheDir, 'metadata'); const embeddingsDir = path.join(cacheDir, 'embeddings'); const vectorsDir = path.join(cacheDir, 'vectors'); if (!existsSync(metadataDir)) { await fs.mkdir(metadataDir, { recursive: true }); } if (!existsSync(embeddingsDir)) { await fs.mkdir(embeddingsDir, { recursive: true }); } if (!existsSync(vectorsDir)) { await fs.mkdir(vectorsDir, { recursive: true }); } expect(existsSync(metadataDir)).toBe(true); expect(existsSync(embeddingsDir)).toBe(true); expect(existsSync(vectorsDir)).toBe(true); // Test cache population by saving embedding data const testText = CLIENT_EMAIL_PARAGRAPH; const embedding = await env.services.embedding.generateSingleEmbedding(testText); // Save embedding to cache const cacheKey = 'test-client-email-embedding'; const embeddingCachePath = path.join(embeddingsDir, `${cacheKey}.json`); await fs.writeFile(embeddingCachePath, JSON.stringify(embedding, null, 2)); // Test vector similarity cache as well const shortEmbedding = await env.services.embedding.generateSingleEmbedding(SHORT_TEXT); const similarity = env.services.embedding.calculateSimilarity(embedding, shortEmbedding); const similarityData = { text1: testText, text2: SHORT_TEXT, similarity: similarity, dimensions1: embedding.vector.length, dimensions2: shortEmbedding.vector.length, calculatedAt: new Date().toISOString() }; // Save similarity data to cache const similarityCacheKey = 'test-similarity-calculation'; const similarityCachePath = path.join(vectorsDir, `${similarityCacheKey}.json`); await fs.writeFile(similarityCachePath, JSON.stringify(similarityData, null, 2)); // Verify cache entries exist expect(existsSync(embeddingCachePath)).toBe(true); expect(existsSync(similarityCachePath)).toBe(true); // Verify cache contents can be loaded const cachedEmbedding = JSON.parse(await fs.readFile(embeddingCachePath, 'utf8')); const cachedSimilarity = JSON.parse(await fs.readFile(similarityCachePath, 'utf8')); expect(cachedEmbedding).toBeTruthy(); expect(cachedEmbedding).toHaveProperty('vector'); expect(Array.isArray(cachedEmbedding.vector)).toBe(true); expect(cachedEmbedding.vector.length).toBeGreaterThanOrEqual(384); expect(cachedSimilarity).toBeTruthy(); expect(cachedSimilarity).toHaveProperty('similarity'); expect(typeof cachedSimilarity.similarity).toBe('number'); expect(cachedSimilarity.similarity).toBeGreaterThanOrEqual(-1); expect(cachedSimilarity.similarity).toBeLessThanOrEqual(1); console.log(`✅ Cache directory created and validated at: ${cacheDir}`); console.log(`✅ Cache populated with embedding data for client email text`); console.log(`✅ Cache populated with similarity calculation data`); console.log('✅ Embedding processing cache infrastructure is ready'); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/okets/folder-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

embedding-real.test.ts•11.2 KiB