/**
* Text analysis and batch processing tools
*/
import { countLetter, letterFrequency, CountLetterOutput, LetterFrequencyOutput } from './counting.js';
// ============================================================================
// compare_texts - Compare letter frequencies between two texts
// ============================================================================
export interface CompareTextsInput {
text1: string;
text2: string;
case_sensitive: boolean;
}
export interface FrequencyComparison {
char: string;
count_in_text1: number;
count_in_text2: number;
difference: number;
}
export interface CompareTextsOutput {
text1: string;
text2: string;
text1_length: number;
text2_length: number;
case_sensitive: boolean;
common_characters: string[];
unique_to_text1: string[];
unique_to_text2: string[];
frequency_comparison: FrequencyComparison[];
similarity_score: number;
summary: string;
}
export function compareTexts(input: CompareTextsInput): CompareTextsOutput {
const { text1, text2, case_sensitive } = input;
const freq1 = letterFrequency({
text: text1,
case_sensitive,
include_spaces: false,
include_punctuation: false,
letters_only: true,
});
const freq2 = letterFrequency({
text: text2,
case_sensitive,
include_spaces: false,
include_punctuation: false,
letters_only: true,
});
const chars1 = new Set(Object.keys(freq1.frequency));
const chars2 = new Set(Object.keys(freq2.frequency));
const common_characters = [...chars1].filter(c => chars2.has(c)).sort();
const unique_to_text1 = [...chars1].filter(c => !chars2.has(c)).sort();
const unique_to_text2 = [...chars2].filter(c => !chars1.has(c)).sort();
const allChars = new Set([...chars1, ...chars2]);
const frequency_comparison: FrequencyComparison[] = [...allChars]
.sort()
.map(char => ({
char,
count_in_text1: freq1.frequency[char] || 0,
count_in_text2: freq2.frequency[char] || 0,
difference: (freq1.frequency[char] || 0) - (freq2.frequency[char] || 0),
}));
// Calculate Jaccard similarity
const intersection = common_characters.length;
const union = allChars.size;
const similarity_score = union > 0 ? Math.round((intersection / union) * 100) : 0;
const summary = `Text1 (${text1.length} chars) and Text2 (${text2.length} chars) share ${common_characters.length} common characters. Similarity: ${similarity_score}%.`;
return {
text1,
text2,
text1_length: text1.length,
text2_length: text2.length,
case_sensitive,
common_characters,
unique_to_text1,
unique_to_text2,
frequency_comparison,
similarity_score,
summary,
};
}
// ============================================================================
// analyze_sentence - Word-by-word breakdown of character counts
// ============================================================================
export interface AnalyzeSentenceInput {
text: string;
letter: string;
case_sensitive: boolean;
}
export interface WordAnalysis {
word: string;
position: number;
length: number;
letter_count: number;
letter_positions: number[];
}
export interface AnalyzeSentenceOutput {
text: string;
letter: string;
case_sensitive: boolean;
total_count: number;
word_count: number;
words: WordAnalysis[];
summary: string;
breakdown_table: string;
}
export function analyzeSentence(input: AnalyzeSentenceInput): AnalyzeSentenceOutput {
const { text, letter, case_sensitive } = input;
// Split into words while tracking positions
const wordMatches = [...text.matchAll(/\S+/g)];
const words: WordAnalysis[] = wordMatches.map((match, idx) => {
const word = match[0];
const result = countLetter({ text: word, letter, case_sensitive });
return {
word,
position: idx + 1,
length: [...word].length,
letter_count: result.count,
letter_positions: result.positions,
};
});
const total_count = words.reduce((sum, w) => sum + w.letter_count, 0);
// Create breakdown table
const breakdown_table = words
.map(w => `${w.position}. "${w.word}" (${w.length} chars): ${w.letter_count} '${letter}'${w.letter_positions.length > 0 ? ` at positions [${w.letter_positions.join(', ')}]` : ''}`)
.join('\n');
const summary = `Found ${total_count} occurrence${total_count === 1 ? '' : 's'} of '${letter}' across ${words.length} word${words.length === 1 ? '' : 's'}.`;
return {
text,
letter,
case_sensitive,
total_count,
word_count: words.length,
words,
summary,
breakdown_table,
};
}
// ============================================================================
// batch_count - Count a letter across multiple words at once
// ============================================================================
export interface BatchCountInput {
words: string[];
letter: string;
case_sensitive: boolean;
}
export interface BatchWordResult {
word: string;
count: number;
positions: number[];
}
export interface BatchCountOutput {
letter: string;
case_sensitive: boolean;
results: BatchWordResult[];
total_count: number;
words_with_letter: number;
words_without_letter: number;
summary: string;
sorted_by_count: BatchWordResult[];
}
export function batchCount(input: BatchCountInput): BatchCountOutput {
const { words, letter, case_sensitive } = input;
const results: BatchWordResult[] = words.map(word => {
const result = countLetter({ text: word, letter, case_sensitive });
return {
word,
count: result.count,
positions: result.positions,
};
});
const total_count = results.reduce((sum, r) => sum + r.count, 0);
const words_with_letter = results.filter(r => r.count > 0).length;
const words_without_letter = results.filter(r => r.count === 0).length;
const sorted_by_count = [...results].sort((a, b) => b.count - a.count);
const summary = `Analyzed ${words.length} word${words.length === 1 ? '' : 's'}: found ${total_count} total '${letter}'${total_count === 1 ? '' : 's'}. ${words_with_letter} word${words_with_letter === 1 ? '' : 's'} contain the letter, ${words_without_letter} do not.`;
return {
letter,
case_sensitive,
results,
total_count,
words_with_letter,
words_without_letter,
summary,
sorted_by_count,
};
}