analyze-fuzzy-logs.js•8.5 kB
#!/usr/bin/env node
import { fuzzySearchLogger } from '../dist/utils/fuzzySearchLogger.js';
// Simple argument parsing
const args = process.argv.slice(2);
let failureThreshold = 0.7;
let limit = 100;
// Parse arguments
for (let i = 0; i < args.length; i++) {
if (args[i] === '--threshold' || args[i] === '-t') {
failureThreshold = parseFloat(args[i + 1]) || 0.7;
}
if (args[i] === '--limit' || args[i] === '-l') {
limit = parseInt(args[i + 1], 10) || 100;
}
if (args[i].startsWith('--threshold=')) {
failureThreshold = parseFloat(args[i].split('=')[1]) || 0.7;
}
if (args[i].startsWith('--limit=')) {
limit = parseInt(args[i].split('=')[1], 10) || 100;
}
}
if (args.includes('--help') || args.includes('-h')) {
console.log(`Analyze fuzzy search logs for patterns and issues
Usage: node analyze-fuzzy-logs.js [options]
Options:
-t, --threshold <number> Failure threshold (0-1) (default: 0.7)
-l, --limit <number> Maximum number of logs to analyze (default: 100)
-h, --help Show this help message`);
process.exit(0);
}
async function analyzeLogs() {
try {
const logs = await fuzzySearchLogger.getRecentLogs(limit);
const logPath = await fuzzySearchLogger.getLogPath();
if (logs.length === 0) {
console.log(`No fuzzy search logs found. Log file location: ${logPath}`);
return;
}
console.log('\n=== Fuzzy Search Analysis ===\n');
// Parse logs and gather statistics
let totalEntries = 0;
let exactMatches = 0;
let fuzzyMatches = 0;
let failures = 0;
let belowThresholdCount = 0;
const executionTimes = [];
const similarities = [];
const fileExtensions = new Map();
const commonCharacterCodes = new Map();
const failureReasons = [];
for (const log of logs) {
const parts = log.split('\t');
if (parts.length >= 16) {
totalEntries++;
const [
timestamp, searchText, foundText, similarity,
executionTime, exactMatchCount, expectedReplacements,
fuzzyThreshold, belowThreshold, diff,
searchLength, foundLength, fileExtension,
characterCodes, uniqueCharacterCount, diffLength
] = parts;
const simValue = parseFloat(similarity);
const execTime = parseFloat(executionTime);
const exactCount = parseInt(exactMatchCount);
const belowThresh = belowThreshold === 'true';
if (exactCount > 0) {
exactMatches++;
} else if (simValue >= failureThreshold) {
fuzzyMatches++;
} else {
failures++;
// Store failure case for analysis
failureReasons.push({
similarity: simValue,
diff: diff.replace(/\\n/g, '\n').replace(/\\t/g, '\t'),
fileExtension,
characterCodes
});
}
if (belowThresh) {
belowThresholdCount++;
}
executionTimes.push(execTime);
similarities.push(simValue);
// Track file extensions
fileExtensions.set(fileExtension, (fileExtensions.get(fileExtension) || 0) + 1);
// Track character codes that appear in diffs
if (characterCodes && characterCodes !== '') {
const codes = characterCodes.split(',');
for (const code of codes) {
const key = code.split(':')[0];
commonCharacterCodes.set(key, (commonCharacterCodes.get(key) || 0) + 1);
}
}
}
}
// Calculate statistics
const avgExecutionTime = executionTimes.reduce((a, b) => a + b, 0) / executionTimes.length;
const avgSimilarity = similarities.reduce((a, b) => a + b, 0) / similarities.length;
const maxExecutionTime = Math.max(...executionTimes);
const minExecutionTime = Math.min(...executionTimes);
// Sort by frequency
const sortedExtensions = Array.from(fileExtensions.entries()).sort((a, b) => b[1] - a[1]);
const sortedCharCodes = Array.from(commonCharacterCodes.entries()).sort((a, b) => b[1] - a[1]);
// Display results
console.log(`Total Entries: ${totalEntries}`);
console.log(`Exact Matches: ${exactMatches} (${((exactMatches / totalEntries) * 100).toFixed(2)}%)`);
console.log(`Fuzzy Matches: ${fuzzyMatches} (${((fuzzyMatches / totalEntries) * 100).toFixed(2)}%)`);
console.log(`Failures: ${failures} (${((failures / totalEntries) * 100).toFixed(2)}%)`);
console.log(`Below Threshold: ${belowThresholdCount} (${((belowThresholdCount / totalEntries) * 100).toFixed(2)}%)`);
console.log('\n--- Performance Metrics ---');
console.log(`Average Execution Time: ${avgExecutionTime.toFixed(2)}ms`);
console.log(`Min Execution Time: ${minExecutionTime.toFixed(2)}ms`);
console.log(`Max Execution Time: ${maxExecutionTime.toFixed(2)}ms`);
console.log(`Average Similarity: ${(avgSimilarity * 100).toFixed(2)}%`);
console.log('\n--- File Extensions (Top 5) ---');
sortedExtensions.slice(0, 5).forEach(([ext, count]) => {
console.log(`${ext || 'none'}: ${count} times`);
});
console.log('\n--- Common Character Codes in Diffs (Top 10) ---');
sortedCharCodes.slice(0, 10).forEach(([code, count]) => {
const charCode = parseInt(code);
const char = String.fromCharCode(charCode);
const display = charCode < 32 || charCode > 126 ? `\\x${charCode.toString(16).padStart(2, '0')}` : char;
console.log(`${code} [${display}]: ${count} times`);
});
// Analyze failure patterns
if (failures > 0) {
console.log('\n--- Failure Analysis ---');
console.log(`Total failures: ${failures}`);
// Group failures by similarity ranges
const similarityRanges = {
'0-20%': 0,
'21-40%': 0,
'41-60%': 0,
'61-80%': 0,
'81-99%': 0
};
failureReasons.forEach(failure => {
const sim = failure.similarity * 100;
if (sim <= 20) similarityRanges['0-20%']++;
else if (sim <= 40) similarityRanges['21-40%']++;
else if (sim <= 60) similarityRanges['41-60%']++;
else if (sim <= 80) similarityRanges['61-80%']++;
else similarityRanges['81-99%']++;
});
console.log('\nFailures by similarity range:');
Object.entries(similarityRanges).forEach(([range, count]) => {
if (count > 0) {
console.log(` ${range}: ${count} failures`);
}
});
// Show most common failure reasons
const failuresByExtension = new Map();
failureReasons.forEach(failure => {
const key = failure.fileExtension || 'none';
failuresByExtension.set(key, (failuresByExtension.get(key) || 0) + 1);
});
console.log('\nFailures by file extension:');
Array.from(failuresByExtension.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 5)
.forEach(([ext, count]) => {
console.log(` ${ext}: ${count} failures`);
});
}
// Recommendations
console.log('\n--- Recommendations ---');
if (failures > totalEntries * 0.1) {
console.log(`⚠️ High failure rate (${((failures / totalEntries) * 100).toFixed(1)}%). Consider:
- Reviewing search text formatting (whitespace, line endings)
- Checking for encoding issues
- Using smaller, more specific search patterns`);
}
if (avgExecutionTime > 100) {
console.log(`⚠️ Slow execution times (avg: ${avgExecutionTime.toFixed(2)}ms). Consider:
- Reducing search text length
- Breaking large edits into smaller chunks`);
}
if (sortedCharCodes.length > 0) {
const topCharCode = sortedCharCodes[0];
const charCode = parseInt(topCharCode[0]);
if (charCode === 13 || charCode === 10) {
console.log(`💡 Most common character differences involve line endings (CR/LF).
Consider normalizing line endings in your search text.`);
} else if (charCode === 32 || charCode === 9) {
console.log(`💡 Most common character differences involve whitespace.
Consider trimming whitespace or being more specific about spacing.`);
}
}
console.log(`\nLog file location: ${logPath}`);
console.log(`Analysis completed for ${totalEntries} entries.`);
} catch (error) {
console.error('Failed to analyze fuzzy search logs:', error.message);
process.exit(1);
}
}
analyzeLogs();