Tea Rags MCP

benchmarks.mjs•15.8 KiB

/** * Benchmark Functions * * All benchmark functions for testing different parameters. */ import { randomUUID } from "crypto"; import { config, CRITERIA, EMBEDDING_CALIBRATION, MEDIAN_CODE_CHUNK_SIZE } from "./config.mjs"; // Track all created collections for cleanup export const createdCollections = new Set(); export async function withTimeout(promise, ms, label) { const timeout = new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout: ${label} exceeded ${ms}ms`)), ms), ); return Promise.race([promise, timeout]); } /** * Calculate median of an array */ export function median(arr) { if (arr.length === 0) return 0; const sorted = [...arr].sort((a, b) => a - b); const mid = Math.floor(sorted.length / 2); return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2; } /** * Generate realistic code chunks for benchmarking * Each chunk matches MEDIAN_CODE_CHUNK_SIZE to simulate real GPU load */ export function generateTexts(count) { const targetSize = MEDIAN_CODE_CHUNK_SIZE; return Array.from({ length: count }, (_, i) => { // Base code template (~200 chars) let chunk = `/** * Module ${i}: Data processing utilities * @module processor_${i} */ import { validateInput, transformData } from './utils'; import { Logger } from '../logging'; const logger = new Logger('processor_${i}'); export class DataProcessor_${i} { constructor(config) { this.config = config; this.cache = new Map(); } async process(data) { logger.debug('Processing batch', { size: data.length }); const validated = data.filter(item => validateInput(item)); const results = validated.map(item => ({ id: item.id * ${i}, value: Math.sqrt(item.value) + ${i % 100}, timestamp: Date.now() })); return results.filter(r => r.value > 0); } `; // Pad with realistic code to reach target size while (chunk.length < targetSize) { const methodNum = Math.floor((chunk.length - 200) / 300); chunk += ` async helper_${methodNum}(input) { const processed = transformData(input); this.cache.set(input.id, processed); return processed; } `; } chunk += `}\n`; return chunk.slice(0, targetSize); }); } /** * Benchmark embedding batch size with FIXED sample count * * KEY PRINCIPLE: Same number of samples for ALL batch sizes. * This ensures apples-to-apples comparison. * * @param {Object} embeddings - Embeddings client * @param {string[]} texts - Test texts (should have FIXED_SAMPLES count) * @param {number} batchSize - Batch size to test * @param {number} runs - Number of runs (default from config) * @param {Object} options - Additional options * @param {number} options.plateauTimeout - Max time before degradation (ms) * @returns {Promise<{batchSize, time, rate, times[], batches, error, degraded}>} */ export async function benchmarkEmbeddingBatchSize( embeddings, texts, batchSize, runs = EMBEDDING_CALIBRATION.RUNS, options = {}, ) { // EMBEDDING_BATCH_SIZE now controls pipeline accumulator, not Ollama splitting. // Benchmark simulates accumulator behavior by splitting texts into batches. process.env.EMBEDDING_BATCH_SIZE = String(batchSize); process.env.EMBEDDING_CONCURRENCY = "1"; // Isolate batch size variable const expectedBatches = Math.ceil(texts.length / batchSize); const { plateauTimeout } = options; // Use plateau timeout if provided, otherwise default const timeout = plateauTimeout ? Math.min(plateauTimeout, CRITERIA.TEST_TIMEOUT_MS * 4) : CRITERIA.TEST_TIMEOUT_MS * 4; try { const times = []; for (let i = 0; i < runs; i++) { const start = Date.now(); // Simulate pipeline accumulator: split texts into batches and call embedBatch for each const batches = []; for (let j = 0; j < texts.length; j += batchSize) { batches.push(texts.slice(j, j + batchSize)); } await withTimeout( Promise.all(batches.map((batch) => embeddings.embedBatch(batch))), timeout, `batch size ${batchSize} run ${i + 1}`, ); times.push(Date.now() - start); } const medianTime = median(times); const rate = Math.round((texts.length * 1000) / medianTime); return { batchSize, time: medianTime, rate, times, batches: expectedBatches, error: null, degraded: false, }; } catch (error) { // Check if timeout was due to plateau degradation const isDegradation = plateauTimeout && error.message.includes("Timeout"); return { batchSize, time: 0, rate: 0, times: [], batches: expectedBatches, error: isDegradation ? "degradation" : error.message, degraded: isDegradation, }; } } export async function benchmarkCodeBatchSize(qdrant, points, batchSize) { const collection = `tune_batch_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; createdCollections.add(collection); try { await qdrant.createCollection(collection, config.EMBEDDING_DIMENSION, "Cosine"); const start = Date.now(); for (let i = 0; i < points.length; i += batchSize) { const batch = points.slice(i, i + batchSize); await withTimeout( qdrant.addPointsOptimized(collection, batch, { wait: i + batchSize >= points.length, ordering: "weak", }), CRITERIA.TEST_TIMEOUT_MS, `code batch ${batchSize}`, ); } const time = Date.now() - start; const rate = Math.round((points.length * 1000) / time); return { batchSize, time, rate, error: null }; } catch (error) { return { batchSize, time: 0, rate: 0, error: error.message }; } finally { try { await qdrant.deleteCollection(collection); createdCollections.delete(collection); } catch {} } } /** * Benchmark embedding concurrency with FIXED sample count * * KEY PRINCIPLE: Same number of samples for ALL concurrency levels. * This ensures apples-to-apples comparison. * * Concurrency affects HOW batches are processed (parallel vs sequential), * not HOW MANY samples we test. * * @param {Object} embeddings - Embeddings client * @param {string[]} texts - Test texts (should have FIXED_SAMPLES count) * @param {number} concurrency - Concurrency level to test * @param {number} batchSize - Batch size to use * @param {number} runs - Number of runs (default from config) * @param {Object} options - Additional options * @param {number} options.plateauTimeout - Max time before degradation (ms) * @returns {Promise<{concurrency, time, rate, times[], batches, error, degraded}>} */ export async function benchmarkConcurrency( embeddings, texts, concurrency, batchSize, runs = EMBEDDING_CALIBRATION.RUNS, options = {}, ) { // EMBEDDING_BATCH_SIZE now controls pipeline accumulator, not Ollama splitting. // Benchmark simulates accumulator: split texts, then process with concurrency. process.env.EMBEDDING_BATCH_SIZE = String(batchSize); process.env.EMBEDDING_CONCURRENCY = String(concurrency); const expectedBatches = Math.ceil(texts.length / batchSize); const expectedGroups = Math.ceil(expectedBatches / concurrency); const { plateauTimeout } = options; // Use plateau timeout if provided, otherwise default const timeout = plateauTimeout ? Math.min(plateauTimeout, CRITERIA.TEST_TIMEOUT_MS * 4) : CRITERIA.TEST_TIMEOUT_MS * 4; try { const times = []; for (let i = 0; i < runs; i++) { const start = Date.now(); // Simulate pipeline: split into batches, process groups with concurrency const batches = []; for (let j = 0; j < texts.length; j += batchSize) { batches.push(texts.slice(j, j + batchSize)); } // Process batches in groups of `concurrency` for (let g = 0; g < batches.length; g += concurrency) { const group = batches.slice(g, g + concurrency); await withTimeout( Promise.all(group.map((batch) => embeddings.embedBatch(batch))), timeout, `concurrency ${concurrency} run ${i + 1}`, ); } times.push(Date.now() - start); } const medianTime = median(times); const rate = Math.round((texts.length * 1000) / medianTime); return { concurrency, time: medianTime, rate, times, batches: expectedBatches, groups: expectedGroups, error: null, degraded: false, }; } catch (error) { const isDegradation = plateauTimeout && error.message.includes("Timeout"); return { concurrency, time: 0, rate: 0, times: [], batches: expectedBatches, groups: expectedGroups, error: isDegradation ? "degradation" : error.message, degraded: isDegradation, }; } } export async function benchmarkOrdering(qdrant, points, ordering) { const collection = `tune_ord_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; const batchSize = 256; createdCollections.add(collection); try { await qdrant.createCollection(collection, config.EMBEDDING_DIMENSION, "Cosine"); const start = Date.now(); for (let i = 0; i < points.length; i += batchSize) { const batch = points.slice(i, i + batchSize); await withTimeout( qdrant.addPointsOptimized(collection, batch, { wait: i + batchSize >= points.length, ordering, }), CRITERIA.TEST_TIMEOUT_MS, `ordering ${ordering}`, ); } const time = Date.now() - start; const rate = Math.round((points.length * 1000) / time); return { ordering, time, rate, error: null }; } catch (error) { return { ordering, time: 0, rate: 0, error: error.message }; } finally { try { await qdrant.deleteCollection(collection); createdCollections.delete(collection); } catch {} } } export async function benchmarkFlushInterval(qdrant, points, interval, optimalBatchSize, optimalOrdering) { const collection = `tune_flush_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; createdCollections.add(collection); try { await qdrant.createCollection(collection, config.EMBEDDING_DIMENSION, "Cosine"); process.env.QDRANT_FLUSH_INTERVAL_MS = String(interval); const start = Date.now(); for (let i = 0; i < points.length; i += optimalBatchSize) { const batch = points.slice(i, i + optimalBatchSize); const isLast = i + optimalBatchSize >= points.length; await withTimeout( qdrant.addPointsOptimized(collection, batch, { wait: isLast, ordering: optimalOrdering, }), CRITERIA.TEST_TIMEOUT_MS, `flush interval ${interval}ms`, ); } const time = Date.now() - start; const rate = Math.round((points.length * 1000) / time); return { interval, time, rate, error: null }; } catch (error) { return { interval, time: 0, rate: 0, error: error.message }; } finally { try { await qdrant.deleteCollection(collection); createdCollections.delete(collection); } catch {} } } export async function benchmarkBatchFormationTimeout(qdrant, points, timeoutMs, optimalBatchSize, optimalOrdering) { const collection = `tune_bft_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; createdCollections.add(collection); try { await qdrant.createCollection(collection, config.EMBEDDING_DIMENSION, "Cosine"); // Simulate batch formation effect: // Short timeout → many small partial batches (simulate 30-50% fill rate) // Long timeout → fewer full batches (simulate 80-100% fill rate) const fillRate = Math.min(1.0, 0.3 + (timeoutMs / 5000) * 0.7); const effectiveBatchSize = Math.max(1, Math.round(optimalBatchSize * fillRate)); const start = Date.now(); for (let i = 0; i < points.length; i += effectiveBatchSize) { const batch = points.slice(i, i + effectiveBatchSize); await withTimeout( qdrant.addPointsOptimized(collection, batch, { wait: i + effectiveBatchSize >= points.length, ordering: optimalOrdering, }), CRITERIA.TEST_TIMEOUT_MS, `batch formation timeout ${timeoutMs}ms`, ); } const time = Date.now() - start; const rate = Math.round((points.length * 1000) / time); return { timeoutMs, time, rate, effectiveBatchSize, fillRate, error: null }; } catch (error) { return { timeoutMs, time: 0, rate: 0, effectiveBatchSize: 0, fillRate: 0, error: error.message }; } finally { try { await qdrant.deleteCollection(collection); createdCollections.delete(collection); } catch {} } } export async function benchmarkDeleteBatchSize(qdrant, points, deleteBatchSize, optimalBatchSize, optimalOrdering) { const collection = `tune_del_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; createdCollections.add(collection); try { await qdrant.createCollection(collection, config.EMBEDDING_DIMENSION, "Cosine"); // First, add all points for (let i = 0; i < points.length; i += optimalBatchSize) { const batch = points.slice(i, i + optimalBatchSize); await qdrant.addPointsOptimized(collection, batch, { wait: i + optimalBatchSize >= points.length, ordering: optimalOrdering, }); } // Now test deletion speed const pointIds = points.map((p) => p.id); const start = Date.now(); for (let i = 0; i < pointIds.length; i += deleteBatchSize) { const batch = pointIds.slice(i, i + deleteBatchSize); await withTimeout( qdrant.client.delete(collection, { points: batch, wait: true }), CRITERIA.TEST_TIMEOUT_MS, `delete batch ${deleteBatchSize}`, ); } const time = Date.now() - start; const rate = Math.round((pointIds.length * 1000) / time); return { batchSize: deleteBatchSize, time, rate, error: null }; } catch (error) { return { batchSize: deleteBatchSize, time: 0, rate: 0, error: error.message }; } finally { try { await qdrant.deleteCollection(collection); createdCollections.delete(collection); } catch {} } } export async function benchmarkDeleteConcurrency( qdrant, points, concurrency, optimalBatchSize, optimalOrdering, optimalDeleteBatch, ) { const collection = `tune_delc_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; createdCollections.add(collection); try { await qdrant.createCollection(collection, config.EMBEDDING_DIMENSION, "Cosine"); // First, add all points for (let i = 0; i < points.length; i += optimalBatchSize) { const batch = points.slice(i, i + optimalBatchSize); await qdrant.addPointsOptimized(collection, batch, { wait: i + optimalBatchSize >= points.length, ordering: optimalOrdering, }); } // Create batches for deletion const pointIds = points.map((p) => p.id); const batches = []; for (let i = 0; i < pointIds.length; i += optimalDeleteBatch) { batches.push(pointIds.slice(i, i + optimalDeleteBatch)); } // Test deletion with concurrency const start = Date.now(); const queue = [...batches]; const running = []; const runOne = async () => { while (queue.length > 0) { const batch = queue.shift(); if (batch) { await qdrant.client.delete(collection, { points: batch, wait: true }); } } }; for (let i = 0; i < concurrency; i++) { running.push(runOne()); } await Promise.all(running); const time = Date.now() - start; const rate = Math.round((pointIds.length * 1000) / time); return { concurrency, time, rate, error: null }; } catch (error) { return { concurrency, time: 0, rate: 0, error: error.message }; } finally { try { await qdrant.deleteCollection(collection); createdCollections.delete(collection); } catch {} } } export function generatePoints(embeddingResults, texts) { return embeddingResults.map((r, i) => ({ id: randomUUID(), vector: r.embedding, payload: { content: texts[i], index: i }, })); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/artk0de/TeaRAGs-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

benchmarks.mjs•15.8 KiB