Skip to main content
Glama
alignBlocks.ts3.44 kB
import { computeJaccardSimilarity } from './computeSimilarity'; import type { AlignmentPair, FingerprintedBlock } from './types'; export const alignEnglishAndFrenchBlocks = ( defaultBlocks: FingerprintedBlock[], secondaryBlocks: FingerprintedBlock[] ): AlignmentPair[] => { // Needleman–Wunsch style global alignment using anchor similarity and type equality const defaultLength = defaultBlocks.length; const secondaryLength = secondaryBlocks.length; const scoreMatrix: number[][] = Array.from( { length: defaultLength + 1 }, () => Array.from({ length: secondaryLength + 1 }, () => 0) ); const traceMatrix: ('diagonal' | 'up' | 'left')[][] = Array.from( { length: defaultLength + 1 }, () => Array.from({ length: secondaryLength + 1 }, () => 'diagonal') ); const gapPenalty = -2; const computeMatchScore = ( defaultIndex: number, secondaryIndex: number ): number => { const defaultBlock = defaultBlocks[defaultIndex]; const secondaryBlock = secondaryBlocks[secondaryIndex]; const typeBonus = defaultBlock.type === secondaryBlock.type ? 2 : 0; const anchorSimilarity = computeJaccardSimilarity( defaultBlock.anchorText, secondaryBlock.anchorText, 3 ); const lengthRatio = Math.min(defaultBlock.content.length, secondaryBlock.content.length) / Math.max(defaultBlock.content.length, secondaryBlock.content.length); const lengthBonus = lengthRatio > 0.75 ? 1 : 0; return typeBonus + lengthBonus + anchorSimilarity * 8; // weighted toward anchor similarity }; // initialize first row and column for (let i = 1; i <= defaultLength; i += 1) { scoreMatrix[i][0] = scoreMatrix[i - 1][0] + gapPenalty; traceMatrix[i][0] = 'up'; } for (let j = 1; j <= secondaryLength; j += 1) { scoreMatrix[0][j] = scoreMatrix[0][j - 1] + gapPenalty; traceMatrix[0][j] = 'left'; } // fill for (let i = 1; i <= defaultLength; i += 1) { for (let j = 1; j <= secondaryLength; j += 1) { const match = scoreMatrix[i - 1][j - 1] + computeMatchScore(i - 1, j - 1); const deleteGap = scoreMatrix[i - 1][j] + gapPenalty; const insertGap = scoreMatrix[i][j - 1] + gapPenalty; const best = Math.max(match, deleteGap, insertGap); scoreMatrix[i][j] = best; traceMatrix[i][j] = best === match ? 'diagonal' : best === deleteGap ? 'up' : 'left'; } } // traceback const result: AlignmentPair[] = []; let i = defaultLength; let j = secondaryLength; while (i > 0 || j > 0) { if (i > 0 && j > 0 && traceMatrix[i][j] === 'diagonal') { const englishIndex = i - 1; const frenchIndex = j - 1; const similarityScore = computeJaccardSimilarity( defaultBlocks[englishIndex].anchorText, secondaryBlocks[frenchIndex].anchorText, 3 ); result.unshift({ englishIndex, frenchIndex, similarityScore }); i -= 1; j -= 1; } else if (i > 0 && (j === 0 || traceMatrix[i][j] === 'up')) { result.unshift({ englishIndex: i - 1, frenchIndex: null, similarityScore: 0, }); i -= 1; } else if (j > 0 && (i === 0 || traceMatrix[i][j] === 'left')) { // french block has no corresponding english block (deleted) result.unshift({ englishIndex: -1, frenchIndex: j - 1, similarityScore: 0, }); j -= 1; } } return result; };

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/aymericzip/intlayer'

If you have feedback or need assistance with the MCP directory API, please join our Discord server