deduplicate_images
Identify and remove visually similar images to extract a diverse subset using semantic analysis and optimization techniques.
Instructions
Get top-k semantically unique images (URLs or base64-encoded) using Jina CLIP v2 embeddings and submodular optimization. Use this when you have many visually similar images and want the most diverse subset.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| images | Yes | Array of image inputs to deduplicate. Each item can be either an HTTP(S) URL or a raw base64-encoded image string (without data URI prefix). | |
| k | No | Number of unique images to return. If not provided, automatically finds optimal k by looking at diminishing return |
Implementation Reference
- src/tools/jina-tools.ts:655-785 (handler)Main execution logic: validates input, fetches CLIP embeddings from Jina AI API, selects diverse subset using submodular optimization, downloads/converts images to PNG base64.async ({ images, k }: { images: string[]; k?: number }) => { try { const props = getProps(); const tokenError = checkBearerToken(props.bearerToken); if (tokenError) { return tokenError; } if (images.length === 0) { return { content: [ { type: "text" as const, text: "No images provided for deduplication", }, ], isError: true, }; } if (k !== undefined && (k <= 0 || k > images.length)) { return { content: [ { type: "text" as const, text: `Invalid k value: ${k}. Must be between 1 and ${images.length}`, }, ], isError: true, }; } // Prepare input for image embeddings API const embeddingInput = images.map((img) => ({ image: img })); // Get image embeddings from Jina API using CLIP v2 const response = await fetch('https://api.jina.ai/v1/embeddings', { method: 'POST', headers: { 'Accept': 'application/json', 'Content-Type': 'application/json', 'Authorization': `Bearer ${props.bearerToken}`, }, body: JSON.stringify({ model: 'jina-clip-v2', input: embeddingInput, }), }); if (!response.ok) { return handleApiError(response, "Getting image embeddings"); } const data = await response.json() as any; if (!data.data || !Array.isArray(data.data)) { return { content: [ { type: "text" as const, text: "Invalid response format from embeddings API", }, ], isError: true, }; } // Extract embeddings const embeddings = data.data.map((item: any) => item.embedding); // Use submodular optimization to select diverse images let selectedIndices: number[]; let values: number[]; if (k !== undefined) { selectedIndices = lazyGreedySelection(embeddings, k); values = []; } else { const result = lazyGreedySelectionWithSaturation(embeddings); selectedIndices = result.selected; values = result.values; } // Get the selected images const selectedImages = selectedIndices.map((idx) => ({ index: idx, source: images[idx] })); const contentItems: Array<{ type: 'image'; data: string; mimeType?: string } | { type: 'text'; text: string }> = []; for (const { index, source } of selectedImages) { try { if (/^https?:\/\//i.test(source)) { // Try to leverage Cloudflare Image Resizing to transcode to PNG when available let imgResp = await fetch(source, { // @ts-ignore cf: { image: { format: 'png' } } } as any); if (!imgResp.ok) { // Fallback to plain fetch if resizing is not available imgResp = await fetch(source); } if (!imgResp.ok) { contentItems.push({ type: 'text', text: `Failed to download image at index ${index}: HTTP ${imgResp.status}` }); continue; } const arrayBuf = await imgResp.arrayBuffer(); const base64Data = Buffer.from(arrayBuf).toString('base64'); contentItems.push({ type: 'image', data: base64Data, mimeType: 'image/png' }); } else { // Treat as raw base64 without data URI; return as PNG by contract contentItems.push({ type: 'image', data: source, mimeType: 'image/png' }); } } catch (e) { contentItems.push({ type: 'text', text: `Error processing image at index ${index}: ${e instanceof Error ? e.message : String(e)}` }); } } return { content: contentItems.length > 0 ? contentItems : [{ type: 'text' as const, text: 'No images to return' }] }; } catch (error) { return { content: [ { type: "text" as const, text: `Error: ${error instanceof Error ? error.message : String(error)}`, }, ], isError: true, }; } },
- src/tools/jina-tools.ts:651-654 (schema)Zod input schema defining 'images' array of strings (URLs or base64) and optional 'k' number.{ images: z.array(z.string()).describe("Array of image inputs to deduplicate. Each item can be either an HTTP(S) URL or a raw base64-encoded image string (without data URI prefix)."), k: z.number().optional().describe("Number of unique images to return. If not provided, automatically finds optimal k by looking at diminishing return"), },
- src/tools/jina-tools.ts:649-649 (registration)Registration of the tool via server.tool() call, including name, description, schema, and handler."deduplicate_images",
- Submodular greedy selection for fixed k: selects k most diverse embeddings based on cosine similarity coverage.export function lazyGreedySelection(embeddings: number[][], k: number): number[] { const n = embeddings.length; if (k >= n) return Array.from({ length: n }, (_, i) => i); const selected: number[] = []; const remaining = new Set(Array.from({ length: n }, (_, i) => i)); // Pre-compute similarity matrix const similarityMatrix: number[][] = []; for (let i = 0; i < n; i++) { similarityMatrix[i] = []; for (let j = 0; j < n; j++) { // Clamp to non-negative to ensure monotone submodularity of facility-location objective const sim = cosineSimilarity(embeddings[i], embeddings[j]); similarityMatrix[i][j] = sim > 0 ? sim : 0; } } // Maintain current coverage vector (max similarity to selected set for each element) const currentCoverage = new Array(n).fill(0); // Priority queue implementation using array (simplified) const pq: Array<[number, number, number]> = []; // Initialize priority queue for (let i = 0; i < n; i++) { const gain = computeMarginalGainDiversity(i, currentCoverage, similarityMatrix); pq.push([-gain, 0, i]); } // Sort by gain (descending) pq.sort((a, b) => a[0] - b[0]); for (let iteration = 0; iteration < k; iteration++) { while (pq.length > 0) { const [negGain, lastUpdated, bestIdx] = pq.shift()!; if (!remaining.has(bestIdx)) continue; if (lastUpdated === iteration) { selected.push(bestIdx); remaining.delete(bestIdx); // Update coverage in O(n) const row = similarityMatrix[bestIdx]; for (let i = 0; i < n; i++) { if (row[i] > currentCoverage[i]) currentCoverage[i] = row[i]; } break; } const currentGain = computeMarginalGainDiversity(bestIdx, currentCoverage, similarityMatrix); pq.push([-currentGain, iteration, bestIdx]); pq.sort((a, b) => a[0] - b[0]); } } return selected; }
- Variant that automatically determines optimal k by detecting saturation in marginal gains.export function lazyGreedySelectionWithSaturation( embeddings: number[][], threshold: number = 1e-2 ): { selected: number[], optimalK: number, values: number[] } { const n = embeddings.length; const selected: number[] = []; const remaining = new Set(Array.from({ length: n }, (_, i) => i)); const values: number[] = []; // Pre-compute similarity matrix const similarityMatrix: number[][] = []; for (let i = 0; i < n; i++) { similarityMatrix[i] = []; for (let j = 0; j < n; j++) { const sim = cosineSimilarity(embeddings[i], embeddings[j]); similarityMatrix[i][j] = sim > 0 ? sim : 0; } } const currentCoverage = new Array(n).fill(0); // Priority queue implementation using array (simplified) const pq: Array<[number, number, number]> = []; // Initialize priority queue for (let i = 0; i < n; i++) { const gain = computeMarginalGainDiversity(i, currentCoverage, similarityMatrix); pq.push([-gain, 0, i]); } // Sort by gain (descending) pq.sort((a, b) => a[0] - b[0]); let earlyStopK: number | null = null; for (let iteration = 0; iteration < n; iteration++) { while (pq.length > 0) { const [negGain, lastUpdated, bestIdx] = pq.shift()!; if (!remaining.has(bestIdx)) continue; if (lastUpdated === iteration) { selected.push(bestIdx); remaining.delete(bestIdx); // Compute current function value (coverage) const row = similarityMatrix[bestIdx]; for (let i = 0; i < n; i++) { if (row[i] > currentCoverage[i]) currentCoverage[i] = row[i]; } const functionValue = currentCoverage.reduce((sum, val) => sum + val, 0) / n; values.push(functionValue); // Early stop when the marginal gain (delta of normalized objective) falls below threshold if (values.length >= 2) { const delta = values[values.length - 1] - values[values.length - 2]; if (delta < threshold) { earlyStopK = values.length; // k is count of selected items } } break; } const currentGain = computeMarginalGainDiversity(bestIdx, currentCoverage, similarityMatrix); pq.push([-currentGain, iteration, bestIdx]); pq.sort((a, b) => a[0] - b[0]); } if (earlyStopK !== null) break; } // Choose k: prefer early stop detection; otherwise, use all collected values const optimalK = earlyStopK ?? values.length; const finalSelected = selected.slice(0, optimalK); return { selected: finalSelected, optimalK, values }; }