Skip to main content
Glama

translator-ai

by DatanoiseTV
index.ts41.2 kB
#!/usr/bin/env node import { Command } from 'commander'; import { promises as fs } from 'fs'; import path from 'path'; import { GoogleGenerativeAI, HarmCategory, HarmBlockThreshold } from '@google/generative-ai'; import ora, { Ora } from 'ora'; import dotenv from 'dotenv'; import { performance } from 'perf_hooks'; import { glob } from 'glob'; import { hashString, getCacheDirectory, getDefaultCacheFilePath, flattenObjectWithPaths, unflattenObject, JsonObject, JsonValue, preserveFormats, restoreFormats, PreservedFormat, sortObjectKeys, compareKeys } from './helpers'; import { TranslatorFactory, TranslatorType } from './translators/factory'; import { TranslationProvider } from './translators/base'; dotenv.config(); // --- TYPE DEFINITIONS --- interface PerformanceStats { readAndParseTime: number; stringCollectionTime: number; totalTranslationTime: number; rebuildTime: number; fileWriteTime: number; totalTime: number; batchTimes: number[]; cacheHits: number; newStrings: number; prunedStrings: number; batchCount: number; targetBatchSize: number; } interface MultiFileStats extends PerformanceStats { totalFiles: number; totalStrings: number; uniqueStrings: number; savedApiCalls: number; deduplicationSavings: number; } type CacheEntry = { [lang: string]: { [hash: string]: string } }; type TranslationCache = { [sourceFilePath: string]: CacheEntry }; // --- CONSTANTS --- const MAX_BATCH_SIZE = 100; // --- HELPER FUNCTIONS --- async function loadCache(cacheFilePath: string): Promise<TranslationCache> { try { await fs.access(cacheFilePath); const data = await fs.readFile(cacheFilePath, 'utf-8'); return JSON.parse(data); } catch { return {}; } } async function ensureCacheDirectory(cacheFilePath: string): Promise<void> { const dir = path.dirname(cacheFilePath); try { await fs.access(dir); } catch { await fs.mkdir(dir, { recursive: true }); } } async function saveCache(cache: TranslationCache, cacheFilePath: string): Promise<void> { await ensureCacheDirectory(cacheFilePath); await fs.writeFile(cacheFilePath, JSON.stringify(cache, null, 2), 'utf-8'); } async function translateBatch(strings: string[], targetLang: string, translator: TranslationProvider, sourceLang: string = 'English', shouldPreserveFormats: boolean = false): Promise<Map<string, string>> { try { let processedStrings = strings; const formatMaps = new Map<string, PreservedFormat>(); // If format preservation is enabled, process strings first if (shouldPreserveFormats) { processedStrings = strings.map(str => { const preserved = preserveFormats(str); formatMaps.set(str, preserved); return preserved.processed; }); } const translations = await translator.translate(processedStrings, targetLang, sourceLang); const translationMap = new Map<string, string>(); strings.forEach((original, index) => { if (translations[index]) { let translated = translations[index]; // Restore preserved formats if enabled if (shouldPreserveFormats && formatMaps.has(original)) { translated = restoreFormats(translated, formatMaps.get(original)!); } translationMap.set(original, translated); } }); return translationMap; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error(`\nTranslation error: ${errorMessage}`); return new Map(); } } // Legacy function for backward compatibility with Gemini direct usage async function translateBatchLegacy(stringBatch: { [key: string]: string }, targetLang: string, model: any): Promise<Map<string, string>> { const prompt = `You are a machine translation service. Your task is to translate the values of the given JSON object from English to the language with the code "${targetLang}". RULES: - ONLY return a single, valid JSON object. - The returned JSON object MUST have the exact same keys as the input object. - Do not include any other text, greetings, explanations, or markdown formatting like \`\`\`json. - If a string cannot be translated, return the original English string for that key. EXAMPLE: Input: { "key_0": "Hello World", "key_1": "This is a test." } Output for target language "fr": { "key_0": "Bonjour le monde", "key_1": "Ceci est un test." } Translate this JSON: ${JSON.stringify(stringBatch, null, 2)}`; try { const result = await model.generateContent(prompt); const responseText = result.response.text(); try { const parsed = JSON.parse(responseText); const translationMap = new Map<string, string>(); for (const key in stringBatch) { if (parsed[key]) { translationMap.set(stringBatch[key], parsed[key]); } } return translationMap; } catch (initialError) { const jsonStart = responseText.indexOf('{'); const jsonEnd = responseText.lastIndexOf('}'); if (jsonStart !== -1 && jsonEnd !== -1 && jsonEnd > jsonStart) { const jsonString = responseText.substring(jsonStart, jsonEnd + 1); try { const parsed = JSON.parse(jsonString); const translationMap = new Map<string, string>(); for (const key in stringBatch) { if (parsed[key]) { translationMap.set(stringBatch[key], parsed[key]); } } return translationMap; } catch (secondaryError) { console.error(`\nError: Failed to parse surgically-extracted JSON. Raw response was:`, responseText); return new Map(); } } else { console.error(`\nError: Could not find a valid JSON structure in the API response. Raw response was:`, responseText); return new Map(); } } } catch (apiError) { const errorMessage = apiError instanceof Error ? apiError.message : String(apiError); console.error(`\nFatal API Error processing a batch. Details: ${errorMessage}`); return new Map(); } } function printStats(stats: PerformanceStats | MultiFileStats, spinner: Ora, isCacheEnabled: boolean) { spinner.info('--- Translation Statistics ---'); const formatMs = (ms: number) => `${ms.toFixed(2)}ms`; const batchTimes = stats.batchTimes.filter(t => t > 0); const avgBatchTime = batchTimes.length > 0 ? batchTimes.reduce((a, b) => a + b, 0) / batchTimes.length : 0; // Check if it's multi-file stats const isMultiFile = 'totalFiles' in stats; if (isMultiFile) { console.log(` - Files: - Total processed: ${stats.totalFiles} - Total strings: ${stats.totalStrings} - Unique strings: ${stats.uniqueStrings} - Deduplication savings: ${stats.deduplicationSavings} strings (${((stats.deduplicationSavings / stats.totalStrings) * 100).toFixed(1)}%)`); } console.log(` - File I/O: - Reading & Parsing: ${formatMs(stats.readAndParseTime)} - Writing File(s): ${formatMs(stats.fileWriteTime)} - Processing: - String Collection: ${formatMs(stats.stringCollectionTime)} - JSON Rebuilding: ${formatMs(stats.rebuildTime)}`); if (isCacheEnabled) { console.log(` - Caching & Sync: - Strings from Cache: ${stats.cacheHits} - New Strings to API: ${stats.newStrings} - Stale Strings Pruned: ${stats.prunedStrings}`); } console.log(` - Translation: - Batches Sent to API: ${stats.batchCount} (target size: ~${stats.targetBatchSize}) - Total API Time: ${formatMs(stats.totalTranslationTime)}`); if (isMultiFile && stats.savedApiCalls > 0) { console.log(` - API Calls Saved: ${stats.savedApiCalls} (by deduplication)`); } if (batchTimes.length > 0) { console.log(` - Batch Times: - Fastest: ${formatMs(Math.min(...batchTimes))} - Slowest: ${formatMs(Math.max(...batchTimes))} - Average: ${formatMs(avgBatchTime)}`); } console.log(`\n - Total Execution Time: ${formatMs(stats.totalTime)}`); } // --- SINGLE FILE PROCESSING --- async function processSingleFile( inputFile: string, lang: string, output: string | undefined, stdout: boolean, showStats: boolean, useCache: boolean, cacheFile: string, translator: TranslationProvider, detectSource: boolean = false, dryRun: boolean = false, preserveFormats: boolean = false, includeMetadata: boolean = false, sortKeys: boolean = false, checkKeys: boolean = false ): Promise<void> { const t = { start: performance.now(), last: performance.now() }; const stats: PerformanceStats = { readAndParseTime: 0, stringCollectionTime: 0, totalTranslationTime: 0, rebuildTime: 0, fileWriteTime: 0, totalTime: 0, batchTimes: [], cacheHits: 0, newStrings: 0, prunedStrings: 0, batchCount: 0, targetBatchSize: 0, }; const spinner = ora({ text: 'Initializing...', isSilent: stdout }).start(); const sourceFilePath = path.resolve(inputFile); let sourceJson: JsonObject; try { sourceJson = JSON.parse(await fs.readFile(sourceFilePath, 'utf-8')); stats.readAndParseTime = performance.now() - t.last; t.last = performance.now(); } catch (e) { spinner.fail(`Failed to read or parse input file: ${inputFile}`); process.exit(1); } // 1. FLATTEN SOURCE TO PATHS spinner.text = 'Analyzing source file structure...'; const sourcePathMap = flattenObjectWithPaths(sourceJson); const allSourceStrings = new Set(sourcePathMap.values()); stats.stringCollectionTime = performance.now() - t.last; t.last = performance.now(); // 1.5. DETECT SOURCE LANGUAGE IF REQUESTED let sourceLang = 'English'; if (detectSource && translator.detectLanguage && allSourceStrings.size > 0) { spinner.text = 'Detecting source language...'; const stringsArray = Array.from(allSourceStrings); sourceLang = await translator.detectLanguage(stringsArray); spinner.info(`Detected source language: ${sourceLang}`); } // Handle dry-run mode if (dryRun) { spinner.info('--- DRY RUN MODE ---'); console.log(`\nSource file: ${inputFile}`); console.log(`Total strings: ${allSourceStrings.size}`); console.log(`Source language: ${sourceLang}`); console.log(`Target language: ${lang}`); console.log(`Format preservation: ${preserveFormats ? 'enabled' : 'disabled'}`); if (useCache) { const resolvedCacheFile = path.isAbsolute(cacheFile) ? cacheFile : path.resolve(cacheFile); const translationCache = await loadCache(resolvedCacheFile); const langCache = translationCache[sourceFilePath]?.[lang] || {}; let cacheHits = 0; let newStrings = 0; allSourceStrings.forEach(str => { const h = hashString(str); if (langCache[h]) { cacheHits++; } else { newStrings++; } }); console.log(`\nCache statistics:`); console.log(` - Cache file: ${resolvedCacheFile}`); console.log(` - Cached translations: ${cacheHits}`); console.log(` - New strings to translate: ${newStrings}`); if (newStrings > 0) { const numBatches = Math.ceil(newStrings / MAX_BATCH_SIZE); console.log(` - Estimated API calls: ${numBatches}`); } } else { const numBatches = Math.ceil(allSourceStrings.size / MAX_BATCH_SIZE); console.log(`\nCache disabled. All strings would be translated.`); console.log(`Estimated API calls: ${numBatches}`); } const finalOutput = output ? output.replace(/\{lang\}/g, lang) : 'stdout'; console.log(`\nOutput would be written to: ${finalOutput}`); spinner.succeed('Dry run complete. No API calls were made.'); return; } // 2. COMPARE WITH CACHE let translationCache: TranslationCache = {}; const translations = new Map<string, string>(); let stringsToTranslateAPI = new Set<string>(); const resolvedCacheFile = path.isAbsolute(cacheFile) ? cacheFile : path.resolve(cacheFile); if (useCache) { spinner.text = `Loading translation cache from ${resolvedCacheFile}...`; translationCache = await loadCache(resolvedCacheFile); const langCache = translationCache[sourceFilePath]?.[lang] || {}; allSourceStrings.forEach(str => { const h = hashString(str); if (langCache[h]) { translations.set(h, langCache[h]); } else { stringsToTranslateAPI.add(str); } }); stats.cacheHits = allSourceStrings.size - stringsToTranslateAPI.size; stats.newStrings = stringsToTranslateAPI.size; spinner.info(`Cache found for '${lang}': ${stats.cacheHits} strings loaded, ${stats.newStrings} new strings to translate.`); } else { spinner.info('Cache is disabled. All strings will be sent for translation.'); stringsToTranslateAPI = allSourceStrings; stats.newStrings = allSourceStrings.size; } // 3. TRANSLATE NEW STRINGS let successfullyTranslatedCount = 0; if (stringsToTranslateAPI.size > 0) { const stringsArray = Array.from(stringsToTranslateAPI); const totalStrings = stringsArray.length; const numBatches = Math.ceil(totalStrings / MAX_BATCH_SIZE); const dynamicBatchSize = Math.ceil(totalStrings / numBatches); stats.batchCount = numBatches; stats.targetBatchSize = dynamicBatchSize; spinner.start(`Translating ${stats.newStrings} new strings in ${stats.batchCount} dynamically sized batches (target size: ~${dynamicBatchSize}) using ${translator.name}...`); const batches: string[][] = []; for (let i = 0; i < totalStrings; i += dynamicBatchSize) { batches.push(stringsArray.slice(i, i + dynamicBatchSize)); } const translationPromises = batches.map(async (batch, i) => { const batchStartTime = performance.now(); const result = await translateBatch(batch, lang, translator, sourceLang, preserveFormats); stats.batchTimes[i] = performance.now() - batchStartTime; spinner.info(`Batch ${i + 1}/${batches.length} (${batch.length} strings) completed in ${stats.batchTimes[i].toFixed(2)}ms.`); return result; }); const newTranslations = await Promise.all(translationPromises); stats.totalTranslationTime = performance.now() - t.last; t.last = performance.now(); newTranslations.forEach(batchMap => { batchMap.forEach((translated, original) => { const h = hashString(original); translations.set(h, translated); successfullyTranslatedCount++; if (useCache) { if (!translationCache[sourceFilePath]) translationCache[sourceFilePath] = {}; if (!translationCache[sourceFilePath][lang]) translationCache[sourceFilePath][lang] = {}; translationCache[sourceFilePath][lang][h] = translated; } }); }); spinner.succeed(`Translation complete. Received ${successfullyTranslatedCount} new translations.`); } else if (useCache) { spinner.succeed('All translations were found in the cache. No API calls needed.'); } // 4. PRUNE CACHE let prunedCount = 0; if (useCache && translationCache[sourceFilePath]?.[lang]) { const currentHashes = new Set(Array.from(allSourceStrings).map(hashString)); for (const h in translationCache[sourceFilePath][lang]) { if (!currentHashes.has(h)) { delete translationCache[sourceFilePath][lang][h]; prunedCount++; } } if (prunedCount > 0) { spinner.info(`Pruned ${prunedCount} stale translations from the cache.`); } } stats.prunedStrings = prunedCount; // 5. SAVE CACHE if (useCache && (successfullyTranslatedCount > 0 || prunedCount > 0)) { await saveCache(translationCache, resolvedCacheFile); spinner.info(`Cache saved to ${resolvedCacheFile}.`); } // 6. REBUILD FROM PATHS spinner.start('Rebuilding translated JSON structure from paths...'); // Start with a deep copy of the original to preserve all keys let translatedJson = JSON.parse(JSON.stringify(sourceJson)) as JsonObject; const PATH_SEPARATOR = '\x00'; const DOT_ESCAPE = '\x01'; // Apply translations by updating the values in place function applyTranslations(obj: any, path: string[] = []): void { if (typeof obj === 'object' && obj !== null && !Array.isArray(obj)) { for (const key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) { applyTranslations(obj[key], [...path, key]); } } } else if (Array.isArray(obj)) { obj.forEach((item, index) => { applyTranslations(item, [...path, `[${index}]`]); }); } else if (typeof obj === 'string') { // Build the flattened path const flatPath = path.map(p => p.replace(/\./g, DOT_ESCAPE)).join(PATH_SEPARATOR); if (sourcePathMap.has(flatPath)) { const sourceString = sourcePathMap.get(flatPath)!; const h = hashString(sourceString); const translatedString = translations.get(h); if (translatedString) { // Update the value in the parent object const parentPath = path.slice(0, -1); const lastKey = path[path.length - 1]; let parent = translatedJson; for (const p of parentPath) { if (p.startsWith('[') && p.endsWith(']')) { parent = parent[parseInt(p.slice(1, -1))] as any; } else { parent = parent[p] as any; } } if (lastKey.startsWith('[') && lastKey.endsWith(']')) { parent[parseInt(lastKey.slice(1, -1))] = translatedString; } else { parent[lastKey] = translatedString; } } } } } applyTranslations(translatedJson); // Apply sorting if requested if (sortKeys) { translatedJson = sortObjectKeys(translatedJson) as JsonObject; } // Add metadata using _comment convention let outputJson = translatedJson; if (includeMetadata) { outputJson = { "_translator_metadata": { "tool": "translator-ai v1.1.0", "repository": "https://github.com/DatanoiseTV/translator-ai", "provider": translator.name, "source_language": detectSource ? sourceLang : "English", "target_language": lang, "timestamp": new Date().toISOString(), "total_strings": sourcePathMap.size, "source_file": path.basename(inputFile) }, ...translatedJson }; } const outputString = JSON.stringify(outputJson, null, 2); stats.rebuildTime = performance.now() - t.last; t.last = performance.now(); spinner.succeed('Rebuilding complete.'); // --- FINAL OUTPUT --- if (stdout) { console.log(outputString); } else if (output) { const finalOutput = output.replace(/\{lang\}/g, lang); await fs.writeFile(finalOutput, outputString, 'utf-8'); stats.fileWriteTime = performance.now() - t.last; spinner.succeed(`Successfully created translation file at ${finalOutput}`); } // --- KEY CHECKING --- if (checkKeys) { const comparison = compareKeys(sourceJson, outputJson); if (!comparison.isValid) { spinner.fail(`Key verification failed!`); console.error(`\nMissing keys (${comparison.missingKeys.size}):`); for (const key of comparison.missingKeys) { console.error(` - ${key}`); } if (comparison.extraKeys.size > 0) { console.error(`\nUnexpected extra keys (${comparison.extraKeys.size}):`); for (const key of comparison.extraKeys) { console.error(` + ${key}`); } } process.exit(1); } else { spinner.succeed(`Key verification passed: all ${comparison.sourceKeys.size} keys are present in output.`); } } // --- STATS --- if (showStats && !stdout) { stats.totalTime = performance.now() - t.start; printStats(stats, spinner, useCache); } } // --- MULTIPLE FILES PROCESSING --- async function processMultipleFiles( inputPatterns: string[], lang: string, outputPattern: string | undefined, stdout: boolean, showStats: boolean, useCache: boolean, cacheFile: string, translator: TranslationProvider, detectSource: boolean = false, dryRun: boolean = false, preserveFormats: boolean = false, includeMetadata: boolean = false, sortKeys: boolean = false, checkKeys: boolean = false ): Promise<void> { const t = { start: performance.now(), last: performance.now() }; const stats: MultiFileStats = { readAndParseTime: 0, stringCollectionTime: 0, totalTranslationTime: 0, rebuildTime: 0, fileWriteTime: 0, totalTime: 0, batchTimes: [], cacheHits: 0, newStrings: 0, prunedStrings: 0, batchCount: 0, targetBatchSize: 0, totalFiles: 0, totalStrings: 0, uniqueStrings: 0, savedApiCalls: 0, deduplicationSavings: 0 }; const spinner = ora({ text: 'Initializing...', isSilent: stdout }).start(); // 1. COLLECT ALL FILES spinner.text = 'Collecting files...'; const allFiles: string[] = []; for (const pattern of inputPatterns) { if (pattern.includes('*') || pattern.includes('?') || pattern.includes('[')) { // It's a glob pattern const files = await glob(pattern, { absolute: true }); allFiles.push(...files); } else { // It's a direct file path allFiles.push(path.resolve(pattern)); } } // Remove duplicates const uniqueFiles = [...new Set(allFiles)]; stats.totalFiles = uniqueFiles.length; if (stats.totalFiles === 0) { spinner.fail('No files found matching the input patterns.'); process.exit(1); } spinner.info(`Found ${stats.totalFiles} file(s) to process.`); // 2. READ AND COLLECT ALL UNIQUE STRINGS spinner.text = 'Reading and analyzing files...'; const fileDataMap = new Map<string, { json: JsonObject, paths: Map<string, string>, outputPath: string }>(); const globalStringMap = new Map<string, Set<string>>(); // string -> set of files containing it for (const filePath of uniqueFiles) { try { const sourceJson = JSON.parse(await fs.readFile(filePath, 'utf-8')); const pathMap = flattenObjectWithPaths(sourceJson); // Determine output path let outputPath: string; if (stdout) { outputPath = 'stdout'; } else if (outputPattern) { const dir = path.dirname(filePath); const base = path.basename(filePath, '.json'); outputPath = outputPattern .replace(/\{dir\}/g, dir) .replace(/\{name\}/g, base) .replace(/\{lang\}/g, lang); } else { // Default: same directory with language suffix const dir = path.dirname(filePath); const base = path.basename(filePath, '.json'); outputPath = path.join(dir, `${base}.${lang}.json`); } fileDataMap.set(filePath, { json: sourceJson, paths: pathMap, outputPath }); // Collect unique strings for (const str of pathMap.values()) { if (!globalStringMap.has(str)) { globalStringMap.set(str, new Set()); } globalStringMap.get(str)!.add(filePath); stats.totalStrings++; } } catch (e) { spinner.fail(`Failed to read or parse file: ${filePath}`); console.error(e); } } stats.readAndParseTime = performance.now() - t.last; t.last = performance.now(); stats.uniqueStrings = globalStringMap.size; stats.deduplicationSavings = stats.totalStrings - stats.uniqueStrings; spinner.succeed(`Analyzed ${stats.totalFiles} files: ${stats.totalStrings} total strings, ${stats.uniqueStrings} unique.`); // 2.5. DETECT SOURCE LANGUAGE IF REQUESTED let sourceLang = 'English'; if (detectSource && translator.detectLanguage && stats.uniqueStrings > 0) { spinner.text = 'Detecting source language...'; const sampleStrings = Array.from(globalStringMap.keys()).slice(0, 10); sourceLang = await translator.detectLanguage(sampleStrings); spinner.info(`Detected source language: ${sourceLang}`); } // Handle dry-run mode for multiple files if (dryRun) { spinner.info('--- DRY RUN MODE ---'); console.log(`\nFiles to process: ${stats.totalFiles}`); console.log(`Total strings: ${stats.totalStrings}`); console.log(`Unique strings: ${stats.uniqueStrings}`); console.log(`Deduplication savings: ${stats.deduplicationSavings} strings (${((stats.deduplicationSavings / stats.totalStrings) * 100).toFixed(1)}%)`); console.log(`Source language: ${sourceLang}`); console.log(`Target language: ${lang}`); console.log(`Format preservation: ${preserveFormats ? 'enabled' : 'disabled'}`); if (useCache) { const resolvedCacheFile = path.isAbsolute(cacheFile) ? cacheFile : path.resolve(cacheFile); const translationCache = await loadCache(resolvedCacheFile); let totalCacheHits = 0; let totalNewStrings = 0; for (const [str, files] of globalStringMap) { const hash = hashString(str); let foundInCache = false; for (const file of files) { if (translationCache[file]?.[lang]?.[hash]) { foundInCache = true; break; } } if (foundInCache) { totalCacheHits++; } else { totalNewStrings++; } } console.log(`\nCache statistics:`); console.log(` - Cache file: ${resolvedCacheFile}`); console.log(` - Cached translations: ${totalCacheHits}`); console.log(` - New strings to translate: ${totalNewStrings}`); if (totalNewStrings > 0) { const numBatches = Math.ceil(totalNewStrings / MAX_BATCH_SIZE); console.log(` - Estimated API calls: ${numBatches}`); const callsWithoutDedup = Math.ceil(stats.totalStrings / MAX_BATCH_SIZE); const savedCalls = Math.max(0, callsWithoutDedup - numBatches); if (savedCalls > 0) { console.log(` - API calls saved by deduplication: ${savedCalls}`); } } } else { const numBatches = Math.ceil(stats.uniqueStrings / MAX_BATCH_SIZE); console.log(`\nCache disabled. All unique strings would be translated.`); console.log(`Estimated API calls: ${numBatches}`); const callsWithoutDedup = Math.ceil(stats.totalStrings / MAX_BATCH_SIZE); const savedCalls = Math.max(0, callsWithoutDedup - numBatches); if (savedCalls > 0) { console.log(`API calls saved by deduplication: ${savedCalls}`); } } console.log(`\nOutput files:`); for (const [filePath, fileData] of fileDataMap) { console.log(` - ${path.relative(process.cwd(), filePath)} → ${stdout ? 'stdout' : path.relative(process.cwd(), fileData.outputPath)}`); } spinner.succeed('Dry run complete. No API calls were made.'); return; } // 3. LOAD CACHE AND DETERMINE WHAT NEEDS TRANSLATION const resolvedCacheFile = path.isAbsolute(cacheFile) ? cacheFile : path.resolve(cacheFile); let translationCache: TranslationCache = {}; const globalTranslations = new Map<string, string>(); // hash -> translation const stringsToTranslate = new Set<string>(); if (useCache) { spinner.text = `Loading translation cache...`; translationCache = await loadCache(resolvedCacheFile); // Check cache for each unique string for (const [str, files] of globalStringMap) { const hash = hashString(str); let foundInCache = false; // Check if this string is cached for any of the files for (const file of files) { if (translationCache[file]?.[lang]?.[hash]) { globalTranslations.set(hash, translationCache[file][lang][hash]); foundInCache = true; stats.cacheHits++; break; } } if (!foundInCache) { stringsToTranslate.add(str); } } spinner.info(`Cache: ${stats.cacheHits} translations found, ${stringsToTranslate.size} new strings to translate.`); } else { for (const str of globalStringMap.keys()) { stringsToTranslate.add(str); } } stats.stringCollectionTime = performance.now() - t.last; t.last = performance.now(); stats.newStrings = stringsToTranslate.size; // 4. TRANSLATE NEW STRINGS IN BATCHES if (stringsToTranslate.size > 0) { const stringsArray = Array.from(stringsToTranslate); const numBatches = Math.ceil(stringsArray.length / MAX_BATCH_SIZE); stats.batchCount = numBatches; stats.targetBatchSize = Math.min(MAX_BATCH_SIZE, Math.ceil(stringsArray.length / numBatches)); spinner.start(`Translating ${stringsToTranslate.size} unique strings in ${numBatches} batches using ${translator.name}...`); let translatedCount = 0; for (let i = 0; i < numBatches; i++) { const batchStart = i * MAX_BATCH_SIZE; const batchEnd = Math.min(batchStart + MAX_BATCH_SIZE, stringsArray.length); const batchStrings = stringsArray.slice(batchStart, batchEnd); spinner.text = `Translating batch ${i + 1}/${numBatches} (${batchStrings.length} strings)...`; const batchStartTime = performance.now(); const batchTranslations = await translateBatch(batchStrings, lang, translator, sourceLang, preserveFormats); stats.batchTimes.push(performance.now() - batchStartTime); batchTranslations.forEach((translated, original) => { const hash = hashString(original); globalTranslations.set(hash, translated); translatedCount++; }); spinner.info(`Batch ${i + 1}/${numBatches} completed in ${stats.batchTimes[stats.batchTimes.length - 1].toFixed(2)}ms.`); } stats.totalTranslationTime = performance.now() - t.last; t.last = performance.now(); spinner.succeed(`Translation complete. ${translatedCount} strings translated in ${stats.batchCount} API calls.`); } else { spinner.succeed('All translations found in cache. No API calls needed.'); } // Calculate saved API calls if (stats.totalFiles > 1 && stats.deduplicationSavings > 0) { // Without deduplication, we would need more API calls const callsWithoutDedup = Math.ceil(stats.totalStrings / MAX_BATCH_SIZE); const actualCalls = stats.batchCount; stats.savedApiCalls = Math.max(0, callsWithoutDedup - actualCalls); } // 5. UPDATE CACHE WITH NEW TRANSLATIONS if (useCache && stringsToTranslate.size > 0) { // Update cache for all files that contain the newly translated strings for (const [str, files] of globalStringMap) { const hash = hashString(str); if (globalTranslations.has(hash)) { const translation = globalTranslations.get(hash)!; for (const file of files) { if (!translationCache[file]) translationCache[file] = {}; if (!translationCache[file][lang]) translationCache[file][lang] = {}; translationCache[file][lang][hash] = translation; } } } await saveCache(translationCache, resolvedCacheFile); spinner.info(`Cache updated.`); } // 6. REBUILD AND WRITE OUTPUT FILES spinner.start('Creating translated files...'); let filesWritten = 0; for (const [filePath, fileData] of fileDataMap) { // Start with a deep copy of the original to preserve all keys let translatedJson = JSON.parse(JSON.stringify(fileData.json)) as JsonObject; const PATH_SEPARATOR = '\x00'; const DOT_ESCAPE = '\x01'; // Apply translations by updating the values in place function applyTranslations(obj: any, path: string[] = []): void { if (typeof obj === 'object' && obj !== null && !Array.isArray(obj)) { for (const key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) { applyTranslations(obj[key], [...path, key]); } } } else if (Array.isArray(obj)) { obj.forEach((item, index) => { applyTranslations(item, [...path, `[${index}]`]); }); } else if (typeof obj === 'string') { // Build the flattened path const flatPath = path.map(p => p.replace(/\./g, DOT_ESCAPE)).join(PATH_SEPARATOR); if (fileData.paths.has(flatPath)) { const sourceString = fileData.paths.get(flatPath)!; const hash = hashString(sourceString); const translatedString = globalTranslations.get(hash); if (translatedString) { // Update the value in the parent object const parentPath = path.slice(0, -1); const lastKey = path[path.length - 1]; let parent: any = translatedJson; for (const p of parentPath) { if (p.startsWith('[') && p.endsWith(']')) { parent = parent[parseInt(p.slice(1, -1))]; } else { parent = parent[p]; } } if (lastKey.startsWith('[') && lastKey.endsWith(']')) { parent[parseInt(lastKey.slice(1, -1))] = translatedString; } else { parent[lastKey] = translatedString; } } } } } applyTranslations(translatedJson); // Apply sorting if requested if (sortKeys) { translatedJson = sortObjectKeys(translatedJson) as JsonObject; } // Add metadata using _comment convention let outputJson = translatedJson; if (includeMetadata) { outputJson = { "_translator_metadata": { "tool": "translator-ai v1.1.0", "repository": "https://github.com/DatanoiseTV/translator-ai", "provider": translator.name, "source_language": detectSource ? sourceLang : "English", "target_language": lang, "timestamp": new Date().toISOString(), "total_strings": fileData.paths.size, "source_file": path.basename(filePath) }, ...translatedJson }; } const outputString = JSON.stringify(outputJson, null, 2); if (stdout) { console.log(`\n=== ${path.basename(filePath)} ===`); console.log(outputString); } else { const outputDir = path.dirname(fileData.outputPath); await fs.mkdir(outputDir, { recursive: true }); await fs.writeFile(fileData.outputPath, outputString, 'utf-8'); filesWritten++; } } stats.rebuildTime = performance.now() - t.last; t.last = performance.now(); stats.fileWriteTime = performance.now() - t.last; if (!stdout) { spinner.succeed(`Created ${filesWritten} translated file(s).`); } // 7. KEY CHECKING if (checkKeys && !stdout) { spinner.start('Verifying keys...'); let allValid = true; const failedFiles: string[] = []; for (const [filePath, fileData] of fileDataMap) { const sourceJson = fileData.json; // Read the output file to check const outputContent = await fs.readFile(fileData.outputPath, 'utf-8'); const outputJson = JSON.parse(outputContent); const comparison = compareKeys(sourceJson, outputJson); if (!comparison.isValid) { allValid = false; failedFiles.push(filePath); console.error(`\n${path.basename(filePath)}: Missing keys (${comparison.missingKeys.size}):`); for (const key of comparison.missingKeys) { console.error(` - ${key}`); } if (comparison.extraKeys.size > 0) { console.error(` Extra keys (${comparison.extraKeys.size}):`); for (const key of comparison.extraKeys) { console.error(` + ${key}`); } } } } if (!allValid) { spinner.fail(`Key verification failed for ${failedFiles.length} file(s)!`); process.exit(1); } else { spinner.succeed(`Key verification passed: all files have correct keys.`); } } // 8. SHOW STATISTICS if (showStats && !stdout) { stats.totalTime = performance.now() - t.start; printStats(stats, spinner, useCache); } } // --- MAIN CLI LOGIC --- async function main() { const program = new Command(); // Check if --list-providers is in args before setting up the full parser if (process.argv.includes('--list-providers')) { console.log('Checking available translation providers...\n'); const providers = await TranslatorFactory.listAvailableProviders(); if (providers.length > 0) { console.log('Available providers:'); providers.forEach(p => console.log(` - ${p}`)); } else { console.log('No translation providers available.'); console.log('\nTo use Gemini: Set GEMINI_API_KEY environment variable'); console.log('To use Ollama: Install and run Ollama, then: ollama pull deepseek-r1:latest'); } process.exit(0); } program .version('1.1.0') .description('Translate JSON i18n files efficiently with caching and deduplication.') .argument('<inputFiles...>', 'Path(s) to source JSON file(s) or glob patterns') .requiredOption('-l, --lang <langCodes>', 'Target language code(s), comma-separated for multiple') .option('-o, --output <pattern>', 'Output file path or pattern. Use {dir}, {name}, {lang} for multiple files') .option('--stdout', 'Output to stdout instead of files') .option('--stats', 'Show detailed statistics') .option('--no-cache', 'Disable translation cache') .option('--cache-file <path>', 'Custom cache file path', getDefaultCacheFilePath()) .option('--provider <type>', 'Translation provider: gemini, ollama, or openai', 'gemini') .option('--ollama-url <url>', 'Ollama API URL', 'http://localhost:11434') .option('--ollama-model <model>', 'Ollama model name', 'deepseek-r1:latest') .option('--list-providers', 'List available translation providers') .option('--verbose', 'Enable verbose output for debugging') .option('--detect-source', 'Auto-detect source language instead of assuming English') .option('--dry-run', 'Preview what would be translated without making API calls') .option('--preserve-formats', 'Preserve URLs, emails, numbers, dates, and other formats') .option('--metadata', 'Add translation metadata to output files (may break some i18n parsers)') .option('--sort-keys', 'Sort output JSON keys alphabetically') .option('--check-keys', 'Verify all source keys exist in output (exit with error if keys are missing)') .option('--gemini-model <model>', 'Gemini model to use', 'gemini-2.0-flash-lite') .option('--openai-model <model>', 'OpenAI model to use', 'gpt-4o-mini') .parse(process.argv); const inputFiles = program.args; const { lang, output, stdout, stats: showStats, cache, cacheFile, provider, ollamaUrl, ollamaModel, geminiModel, openaiModel, detectSource, dryRun, verbose, preserveFormats: shouldPreserveFormats, metadata, sortKeys, checkKeys } = program.opts(); if (!output && !stdout) { program.error('Error: Specify either -o <pattern> or --stdout'); } if (output && stdout) { program.error('Error: Cannot use both -o and --stdout'); } // Enable verbose mode if requested if (verbose) { process.env.OLLAMA_VERBOSE = 'true'; } // Parse target languages const targetLanguages = lang.split(',').map((l: string) => l.trim()).filter((l: string) => l); if (targetLanguages.length === 0) { program.error('Error: No valid target languages specified'); } // Create translator let translator: TranslationProvider; try { translator = await TranslatorFactory.create({ type: provider as TranslatorType, ollamaBaseUrl: ollamaUrl, ollamaModel: ollamaModel, geminiModel: geminiModel, openaiModel: openaiModel, }); console.log(`Using translation provider: ${translator.name}\n`); } catch (error) { console.error(`Error initializing translator: ${error instanceof Error ? error.message : error}`); process.exit(1); } // Determine if we're processing single or multiple files const isSingleFile = inputFiles.length === 1 && !inputFiles[0].includes('*') && !inputFiles[0].includes('?') && !inputFiles[0].includes('['); try { // Process each target language for (const targetLang of targetLanguages) { if (targetLanguages.length > 1) { console.log(`\n=== Processing language: ${targetLang} ===\n`); } if (isSingleFile) { await processSingleFile(inputFiles[0], targetLang, output, stdout, showStats, cache, cacheFile, translator, detectSource, dryRun, shouldPreserveFormats, metadata, sortKeys, checkKeys); } else { await processMultipleFiles(inputFiles, targetLang, output, stdout, showStats, cache, cacheFile, translator, detectSource, dryRun, shouldPreserveFormats, metadata, sortKeys, checkKeys); } } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error(`\nAn unexpected error occurred: ${errorMessage}`); process.exit(1); } } main().catch(error => { const errorMessage = error instanceof Error ? error.message : String(error); console.error(`\nAn unexpected error occurred: ${errorMessage}`); process.exit(1); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/DatanoiseTV/translator-ai'

If you have feedback or need assistance with the MCP directory API, please join our Discord server