Skip to main content
Glama

Jina AI Remote MCP Server

by cedric448
submodular-optimization.ts5.91 kB
// Submodular optimization utilities for string deduplication export function cosineSimilarity(a: number[], b: number[]): number { if (a.length !== b.length) return 0; let dotProduct = 0; let normA = 0; let normB = 0; for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } if (normA === 0 || normB === 0) return 0; return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB)); } export function computeMarginalGainDiversity( newIdx: number, currentCoverage: number[], similarityMatrix: number[][] ): number { const n = similarityMatrix.length; let marginalGain = 0; const row = similarityMatrix[newIdx]; for (let i = 0; i < n; i++) { const newCoverage = row[i] > currentCoverage[i] ? row[i] : currentCoverage[i]; marginalGain += newCoverage - currentCoverage[i]; } return marginalGain; } export function lazyGreedySelection(embeddings: number[][], k: number): number[] { const n = embeddings.length; if (k >= n) return Array.from({ length: n }, (_, i) => i); const selected: number[] = []; const remaining = new Set(Array.from({ length: n }, (_, i) => i)); // Pre-compute similarity matrix const similarityMatrix: number[][] = []; for (let i = 0; i < n; i++) { similarityMatrix[i] = []; for (let j = 0; j < n; j++) { // Clamp to non-negative to ensure monotone submodularity of facility-location objective const sim = cosineSimilarity(embeddings[i], embeddings[j]); similarityMatrix[i][j] = sim > 0 ? sim : 0; } } // Maintain current coverage vector (max similarity to selected set for each element) const currentCoverage = new Array(n).fill(0); // Priority queue implementation using array (simplified) const pq: Array<[number, number, number]> = []; // Initialize priority queue for (let i = 0; i < n; i++) { const gain = computeMarginalGainDiversity(i, currentCoverage, similarityMatrix); pq.push([-gain, 0, i]); } // Sort by gain (descending) pq.sort((a, b) => a[0] - b[0]); for (let iteration = 0; iteration < k; iteration++) { while (pq.length > 0) { const [negGain, lastUpdated, bestIdx] = pq.shift()!; if (!remaining.has(bestIdx)) continue; if (lastUpdated === iteration) { selected.push(bestIdx); remaining.delete(bestIdx); // Update coverage in O(n) const row = similarityMatrix[bestIdx]; for (let i = 0; i < n; i++) { if (row[i] > currentCoverage[i]) currentCoverage[i] = row[i]; } break; } const currentGain = computeMarginalGainDiversity(bestIdx, currentCoverage, similarityMatrix); pq.push([-currentGain, iteration, bestIdx]); pq.sort((a, b) => a[0] - b[0]); } } return selected; } export function lazyGreedySelectionWithSaturation( embeddings: number[][], threshold: number = 1e-2 ): { selected: number[], optimalK: number, values: number[] } { const n = embeddings.length; const selected: number[] = []; const remaining = new Set(Array.from({ length: n }, (_, i) => i)); const values: number[] = []; // Pre-compute similarity matrix const similarityMatrix: number[][] = []; for (let i = 0; i < n; i++) { similarityMatrix[i] = []; for (let j = 0; j < n; j++) { const sim = cosineSimilarity(embeddings[i], embeddings[j]); similarityMatrix[i][j] = sim > 0 ? sim : 0; } } const currentCoverage = new Array(n).fill(0); // Priority queue implementation using array (simplified) const pq: Array<[number, number, number]> = []; // Initialize priority queue for (let i = 0; i < n; i++) { const gain = computeMarginalGainDiversity(i, currentCoverage, similarityMatrix); pq.push([-gain, 0, i]); } // Sort by gain (descending) pq.sort((a, b) => a[0] - b[0]); let earlyStopK: number | null = null; for (let iteration = 0; iteration < n; iteration++) { while (pq.length > 0) { const [negGain, lastUpdated, bestIdx] = pq.shift()!; if (!remaining.has(bestIdx)) continue; if (lastUpdated === iteration) { selected.push(bestIdx); remaining.delete(bestIdx); // Compute current function value (coverage) const row = similarityMatrix[bestIdx]; for (let i = 0; i < n; i++) { if (row[i] > currentCoverage[i]) currentCoverage[i] = row[i]; } const functionValue = currentCoverage.reduce((sum, val) => sum + val, 0) / n; values.push(functionValue); // Early stop when the marginal gain (delta of normalized objective) falls below threshold if (values.length >= 2) { const delta = values[values.length - 1] - values[values.length - 2]; if (delta < threshold) { earlyStopK = values.length; // k is count of selected items } } break; } const currentGain = computeMarginalGainDiversity(bestIdx, currentCoverage, similarityMatrix); pq.push([-currentGain, iteration, bestIdx]); pq.sort((a, b) => a[0] - b[0]); } if (earlyStopK !== null) break; } // Choose k: prefer early stop detection; otherwise, use all collected values const optimalK = earlyStopK ?? values.length; const finalSelected = selected.slice(0, optimalK); return { selected: finalSelected, optimalK, values }; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cedric448/jina-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server