Skip to main content
Glama
TerminologyChecker.tsโ€ข10.6 kB
/** * TerminologyChecker - Detects inconsistent term usage across manuscript * * Finds term variants (e.g., "email" vs "e-mail" vs "Email") and suggests standardization. */ import { WritingStorage } from "../storage/WritingStorage.js"; import { paginateResults } from "../utils/pagination.js"; export interface TermVariant { term: string; count: number; files: string[]; examples: { file: string; line: number; context: string }[]; } export interface TermGroup { canonical: string; variants: TermVariant[]; totalCount: number; inconsistency: "high" | "medium" | "low"; } export interface TerminologyReport { groups: TermGroup[]; totalIssues: number; filesAffected: number; } export class TerminologyChecker { constructor(private storage: WritingStorage) {} /** * Find inconsistent terminology usage across files */ async checkTerminology(options: { scope?: string; autoDetect?: boolean; terms?: string[]; limit?: number; examplesPerVariant?: number; }): Promise<TerminologyReport> { const { scope, autoDetect = true, terms, limit, examplesPerVariant = 3 } = options; if (terms && terms.length > 0) { return this.checkSpecificTerms(terms, scope, limit, examplesPerVariant); } if (autoDetect) { return this.autoDetectVariants(scope, limit, examplesPerVariant); } return { groups: [], totalIssues: 0, filesAffected: 0, }; } /** * Check specific terms for variants */ private async checkSpecificTerms( terms: string[], scope?: string, limit?: number, examplesPerVariant?: number ): Promise<TerminologyReport> { const groups: TermGroup[] = []; const filesAffected = new Set<string>(); for (const term of terms) { const variants = await this.findVariantsForTerm(term, scope, examplesPerVariant); if (variants.length > 1) { // Multiple variants found - inconsistency detected const group = this.createTermGroup(term, variants); groups.push(group); variants.forEach((v) => v.files.forEach((f) => filesAffected.add(f))); } } const limitedGroups = paginateResults(groups, limit); return { groups: limitedGroups, totalIssues: groups.length, filesAffected: filesAffected.size, }; } /** * Auto-detect term variants by analyzing content */ private async autoDetectVariants( scope?: string, limit?: number, examplesPerVariant = 3 ): Promise<TerminologyReport> { // Get all files const files = await this.storage.getAllFiles(); const termCounts = new Map<string, Map<string, number>>(); // Extract and normalize terms for (const file of files) { if (scope && !this.matchesScope(file.file_path, scope)) { continue; } const content = file.content; const words = this.extractWords(content); for (const word of words) { const normalized = this.normalizeForGrouping(word); if (!termCounts.has(normalized)) { termCounts.set(normalized, new Map()); } const variantMap = termCounts.get(normalized); if (variantMap) { variantMap.set(word, (variantMap.get(word) || 0) + 1); } } } // Find groups with multiple variants const groups: TermGroup[] = []; const filesAffected = new Set<string>(); for (const [normalized, variantMap] of termCounts) { if (variantMap.size > 1 && this.isSignificantTerm(normalized, variantMap)) { const variants: TermVariant[] = []; for (const [variant, count] of variantMap) { const examples = await this.findExamples(variant, scope, examplesPerVariant); const variantFiles = examples.map((e) => e.file); variants.push({ term: variant, count, files: [...new Set(variantFiles)], examples, }); variantFiles.forEach((f) => filesAffected.add(f)); } const canonical = this.selectCanonical(variants); const group: TermGroup = { canonical, variants, totalCount: Array.from(variantMap.values()).reduce((a, b) => a + b, 0), inconsistency: this.calculateInconsistency(variants), }; groups.push(group); } } // Sort by inconsistency severity groups.sort((a, b) => { const severityOrder = { high: 3, medium: 2, low: 1 }; return severityOrder[b.inconsistency] - severityOrder[a.inconsistency]; }); const limitedGroups = paginateResults(groups, limit); return { groups: limitedGroups, totalIssues: groups.length, filesAffected: filesAffected.size, }; } /** * Find variants for a specific term */ private async findVariantsForTerm( term: string, scope?: string, examplesPerVariant = 3 ): Promise<TermVariant[]> { const files = await this.storage.getAllFiles(); const variantCounts = new Map<string, number>(); const variantFiles = new Map<string, Set<string>>(); const normalized = this.normalizeForGrouping(term); for (const file of files) { if (scope && !this.matchesScope(file.file_path, scope)) { continue; } const content = file.content; const words = this.extractWords(content); for (const word of words) { if (this.normalizeForGrouping(word) === normalized) { variantCounts.set(word, (variantCounts.get(word) || 0) + 1); if (!variantFiles.has(word)) { variantFiles.set(word, new Set()); } variantFiles.get(word)?.add(file.file_path); } } } const variants: TermVariant[] = []; for (const [variant, count] of variantCounts) { const examples = await this.findExamples(variant, scope, examplesPerVariant); const files = Array.from(variantFiles.get(variant) || []); variants.push({ term: variant, count, files, examples, }); } return variants; } /** * Find usage examples for a term */ private async findExamples( term: string, scope: string | undefined, limit: number ): Promise<{ file: string; line: number; context: string }[]> { const files = await this.storage.getAllFiles(); const examples: { file: string; line: number; context: string }[] = []; for (const file of files) { if (scope && !this.matchesScope(file.file_path, scope)) { continue; } if (examples.length >= limit) { break; } const lines = file.content.split("\n"); for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (this.containsWord(line, term)) { examples.push({ file: file.file_path, line: i + 1, context: line.trim(), }); if (examples.length >= limit) { break; } } } } return examples; } /** * Create term group from variants */ private createTermGroup(canonical: string, variants: TermVariant[]): TermGroup { const totalCount = variants.reduce((sum, v) => sum + v.count, 0); return { canonical, variants, totalCount, inconsistency: this.calculateInconsistency(variants), }; } /** * Select canonical form from variants (most common) */ private selectCanonical(variants: TermVariant[]): string { return variants.reduce((a, b) => (a.count > b.count ? a : b)).term; } /** * Calculate inconsistency level based on variant distribution */ private calculateInconsistency( variants: TermVariant[] ): "high" | "medium" | "low" { if (variants.length <= 1) { return "low"; } const totalCount = variants.reduce((sum, v) => sum + v.count, 0); const maxCount = Math.max(...variants.map((v) => v.count)); const dominance = maxCount / totalCount; // High dominance (>80%) = low inconsistency if (dominance > 0.8) { return "low"; } // Medium dominance (50-80%) = medium inconsistency if (dominance > 0.5) { return "medium"; } // Low dominance (<50%) = high inconsistency return "high"; } /** * Check if term is significant (not common words) */ private isSignificantTerm( normalized: string, variantMap: Map<string, number> ): boolean { const totalCount = Array.from(variantMap.values()).reduce((a, b) => a + b, 0); // Ignore very common words const stopWords = new Set([ "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by", "from", "as", "is", "was", "are", "were", "be", "been", "being", "have", "has", "had", "do", "does", "did", "will", "would", "could", "should", "may", "might", "can", "this", "that", "these", "those", "i", "you", "he", "she", "it", "we", "they", ]); if (stopWords.has(normalized)) { return false; } // Require minimum usage if (totalCount < 3) { return false; } // Require at least 2 characters if (normalized.length < 2) { return false; } return true; } /** * Normalize term for grouping (lowercase, no punctuation) */ private normalizeForGrouping(term: string): string { return term.toLowerCase().replace(/[^a-z0-9]/g, ""); } /** * Extract words from content */ private extractWords(content: string): string[] { // Match words (including hyphenated words) const wordRegex = /\b[\w-]+\b/g; return content.match(wordRegex) || []; } /** * Check if line contains word (word boundary aware) */ private containsWord(line: string, word: string): boolean { const regex = new RegExp(`\\b${this.escapeRegex(word)}\\b`, "i"); return regex.test(line); } /** * Escape regex special characters */ private escapeRegex(str: string): string { return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } /** * Check if file path matches scope pattern */ private matchesScope(filePath: string, scope: string): boolean { // Simple glob matching (supports * wildcard) const pattern = scope.replace(/\*/g, ".*"); const regex = new RegExp(`^${pattern}$`); return regex.test(filePath); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/xiaolai/claude-writers-aid-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server