Skip to main content
Glama
audioAnalyzer.ts25.7 kB
import type { ProcessedTranscript, TranscriptSegment } from './videoAnalysis' export interface VerbalIssue { timestamp: number text: string type: 'bug' | 'confusion' | 'expectation' | 'frustration' | 'question' | 'observation' severity: 'critical' | 'high' | 'medium' | 'low' confidence: number keywords: string[] context?: string } export interface SpeakerIntent { timestamp: number intent: 'reporting' | 'explaining' | 'demonstrating' | 'questioning' | 'troubleshooting' confidence: number segment: string } export interface AudioAnalysisResult { verbalIssues: VerbalIssue[] speakerIntents: SpeakerIntent[] keyPhrases: KeyPhrase[] emotionalTone: EmotionalTone[] technicalTerms: TechnicalTerm[] problemStatements: ProblemStatement[] } export interface KeyPhrase { phrase: string frequency: number timestamps: number[] importance: number } export interface EmotionalTone { timestamp: number tone: 'frustrated' | 'confused' | 'surprised' | 'neutral' | 'satisfied' intensity: number indicators: string[] } export interface TechnicalTerm { term: string category: 'component' | 'error' | 'feature' | 'data' | 'action' | 'state' occurrences: Array<{ timestamp: number; context: string }> } export interface ProblemStatement { timestamp: number statement: string expectedBehavior?: string actualBehavior?: string userImpact?: string } export class AudioAnalyzer { // Common issue-related phrases and their weights private readonly issueIndicators = { bug: { phrases: [ 'bug', 'broken', 'not working', "doesn't work", 'failing', 'failed', 'error', 'crash', 'frozen', 'stuck', 'hangs', 'unresponsive', ], weight: 0.9, }, confusion: { phrases: [ 'confused', "don't understand", 'not sure', 'unclear', 'what is', 'where is', 'how do i', "can't find", 'missing', 'lost', ], weight: 0.7, }, expectation: { phrases: [ 'should', 'supposed to', 'expected', 'thought it would', 'normally', 'usually', 'used to', 'different from', 'changed', ], weight: 0.6, }, frustration: { phrases: [ 'annoying', 'frustrating', 'ugh', 'argh', 'seriously', 'come on', 'why', 'still', 'again', 'keeps', 'always', 'never', ], weight: 0.8, }, question: { phrases: [ 'why is', 'how come', "what's", "where's", 'when did', 'who', 'is this', 'are we', 'could this', 'should this', ], weight: 0.5, }, observation: { phrases: [ 'notice', 'see', 'look', 'shows', 'displays', 'appears', 'seems', 'looks like', 'behaves', 'happens', ], weight: 0.4, }, } // Technical indicators for categorizing terms private readonly technicalIndicators = { component: [ 'component', 'page', 'modal', 'button', 'form', 'input', 'dropdown', 'menu', 'panel', 'widget', ], error: ['error', 'exception', 'fail', 'timeout', 'undefined', 'null', 'invalid', 'missing'], feature: ['feature', 'functionality', 'capability', 'option', 'setting', 'preference'], data: ['data', 'value', 'field', 'record', 'entry', 'item', 'list', 'table'], action: ['click', 'type', 'submit', 'save', 'delete', 'update', 'create', 'load'], state: [ 'state', 'status', 'loading', 'saving', 'enabled', 'disabled', 'active', 'selected', ], } // Emotional tone indicators private readonly emotionalIndicators = { frustrated: { words: ['frustrating', 'annoying', 'irritating', 'ugh', 'argh', 'seriously'], patterns: [/!+/, /\?{2,}/, /\.{3,}/], repetition: ['still', 'again', 'keeps', 'always'], }, confused: { words: ['confused', 'unclear', 'weird', 'strange', 'odd', 'hmm'], patterns: [/\?+/, /what\s+the/, /i\s+don't\s+understand/i], repetition: ['what', 'where', 'how', 'why'], }, surprised: { words: ['wow', 'oh', 'whoa', 'unexpected', 'suddenly', 'surprised'], patterns: [/oh\s+my/, /what\s+the/, /didn't\s+expect/i], repetition: [], }, } async analyzeTranscript(transcript: ProcessedTranscript): Promise<AudioAnalysisResult> { const verbalIssues = this.extractVerbalIssues(transcript) const speakerIntents = this.detectSpeakerIntents(transcript) const keyPhrases = this.extractKeyPhrases(transcript) const emotionalTone = this.analyzeEmotionalTone(transcript) const technicalTerms = this.extractTechnicalTerms(transcript) const problemStatements = this.extractProblemStatements(transcript) return { verbalIssues, speakerIntents, keyPhrases, emotionalTone, technicalTerms, problemStatements, } } private extractVerbalIssues(transcript: ProcessedTranscript): VerbalIssue[] { const issues: VerbalIssue[] = [] for (const segment of transcript.segments) { const text = segment.text.toLowerCase() const words = text.split(/\s+/) // Check each issue type for (const [type, config] of Object.entries(this.issueIndicators)) { const matches = this.findIssueMatches(text, words, config.phrases) if (matches.length > 0) { const severity = this.calculateSeverity( text, matches, type as VerbalIssue['type'] ) const confidence = Math.min(matches.length * config.weight * 0.3, 1) issues.push({ timestamp: segment.startTime, text: segment.text, type: type as VerbalIssue['type'], severity, confidence, keywords: matches, context: this.extractContext(transcript, segment), }) } } } // Merge nearby issues and boost confidence return this.mergeNearbyIssues(issues) } private findIssueMatches(text: string, words: string[], phrases: string[]): string[] { const matches: string[] = [] for (const phrase of phrases) { if (phrase.includes(' ')) { // Multi-word phrase if (text.includes(phrase)) { matches.push(phrase) } } else { // Single word if (words.includes(phrase)) { matches.push(phrase) } } } return [...new Set(matches)] } private calculateSeverity( text: string, matches: string[], type: VerbalIssue['type'] ): VerbalIssue['severity'] { let score = matches.length * 0.2 // Boost for emphasis if (text.includes('!')) score += 0.2 if (text.includes('really') || text.includes('very')) score += 0.15 if (text.includes('completely') || text.includes('totally')) score += 0.2 if (text.match(/\b(critical|severe|major|blocking)\b/)) score += 0.3 // Boost for certain issue types if (type === 'bug' || type === 'frustration') score += 0.2 // Check for user impact mentions if (text.match(/\b(can't|cannot|unable|impossible|prevents)\b/)) score += 0.25 if (text.match(/\b(users?|customers?|everyone|team)\b/)) score += 0.2 if (score >= 0.8) return 'critical' if (score >= 0.6) return 'high' if (score >= 0.4) return 'medium' return 'low' } private detectSpeakerIntents(transcript: ProcessedTranscript): SpeakerIntent[] { const intents: SpeakerIntent[] = [] const intentPatterns = { reporting: [ /i\s+found\s+(?:a\s+)?(?:bug|issue|problem)/i, /there's\s+(?:a\s+)?(?:bug|issue|problem)/i, /(?:this|it)\s+(?:is|seems)\s+broken/i, /reporting\s+(?:a|an|this)/i, ], explaining: [ /what\s+(?:happens|happened)\s+(?:is|was)/i, /let\s+me\s+(?:show|explain)/i, /here's\s+what\s+(?:i|we)\s+(?:see|saw)/i, /the\s+(?:issue|problem)\s+is/i, ], demonstrating: [ /(?:i'm|i\s+am)\s+(?:going\s+to|gonna)\s+(?:show|click|type)/i, /watch\s+what\s+happens/i, /(?:see|look)\s+(?:here|at\s+this)/i, /notice\s+(?:how|what)/i, ], questioning: [ /(?:why|how\s+come)\s+(?:is|does|doesn't)/i, /is\s+(?:this|it)\s+supposed\s+to/i, /should\s+(?:this|it)\s+be/i, /what's\s+going\s+on/i, ], troubleshooting: [ /(?:i|we)\s+(?:tried|attempted)/i, /(?:doesn't|won't)\s+work\s+(?:when|if)/i, /(?:only|always)\s+happens\s+(?:when|if)/i, /(?:refresh|reload|restart)(?:ing|ed)?/i, ], } for (const segment of transcript.segments) { for (const [intent, patterns] of Object.entries(intentPatterns)) { for (const pattern of patterns) { if (pattern.test(segment.text)) { intents.push({ timestamp: segment.startTime, intent: intent as SpeakerIntent['intent'], confidence: 0.8, segment: segment.text, }) break } } } } return intents } private extractKeyPhrases(transcript: ProcessedTranscript): KeyPhrase[] { const phraseMap = new Map<string, { count: number; timestamps: number[] }>() // Extract 2-4 word phrases for (const segment of transcript.segments) { const words = segment.text .toLowerCase() .replace(/[^\w\s]/g, '') .split(/\s+/) .filter((w) => w.length > 2) // Generate n-grams for (let n = 2; n <= 4; n++) { for (let i = 0; i <= words.length - n; i++) { const phrase = words.slice(i, i + n).join(' ') // Skip common phrases if (this.isCommonPhrase(phrase)) continue const existing = phraseMap.get(phrase) || { count: 0, timestamps: [] } existing.count++ existing.timestamps.push(segment.startTime) phraseMap.set(phrase, existing) } } } // Convert to KeyPhrase array and calculate importance const keyPhrases: KeyPhrase[] = [] for (const [phrase, data] of phraseMap.entries()) { if (data.count >= 2) { // Only include repeated phrases keyPhrases.push({ phrase, frequency: data.count, timestamps: data.timestamps, importance: this.calculatePhraseImportance(phrase, data.count), }) } } return keyPhrases.sort((a, b) => b.importance - a.importance).slice(0, 20) } private isCommonPhrase(phrase: string): boolean { const common = [ 'going to', 'want to', 'need to', 'have to', 'used to', 'able to', 'trying to', 'supposed to', 'the the', 'and the', 'in the', 'on the', 'at the', 'to the', 'of the', ] return common.includes(phrase) } private calculatePhraseImportance(phrase: string, frequency: number): number { let importance = frequency * 0.2 // Boost technical phrases if (phrase.match(/\b(error|bug|issue|problem|fail)\b/)) importance += 0.3 if (phrase.match(/\b(component|function|api|endpoint)\b/)) importance += 0.2 if (phrase.match(/\b(user|customer|client)\b/)) importance += 0.2 // Boost action phrases if (phrase.match(/\b(click|type|submit|save|load)\b/)) importance += 0.15 return Math.min(importance, 1) } private analyzeEmotionalTone(transcript: ProcessedTranscript): EmotionalTone[] { const tones: EmotionalTone[] = [] for (const segment of transcript.segments) { const text = segment.text.toLowerCase() const detectedTones = this.detectEmotionalTones(text) if (detectedTones.length > 0) { // Use the strongest detected tone const strongestTone = detectedTones.reduce((a, b) => a.intensity > b.intensity ? a : b ) tones.push({ timestamp: segment.startTime, tone: strongestTone.tone, intensity: strongestTone.intensity, indicators: strongestTone.indicators, }) } } return tones } private detectEmotionalTones( text: string ): Array<{ tone: EmotionalTone['tone']; intensity: number; indicators: string[] }> { const detected: Array<{ tone: EmotionalTone['tone'] intensity: number indicators: string[] }> = [] for (const [tone, config] of Object.entries(this.emotionalIndicators)) { const indicators: string[] = [] let intensity = 0 // Check words for (const word of config.words) { if (text.includes(word)) { indicators.push(word) intensity += 0.3 } } // Check patterns for (const pattern of config.patterns) { if (pattern.test(text)) { indicators.push('pattern: ' + pattern.source) intensity += 0.2 } } // Check repetition for (const word of config.repetition) { const count = (text.match(new RegExp(`\\b${word}\\b`, 'g')) || []).length if (count >= 2) { indicators.push(`repeated: ${word} (${count}x)`) intensity += 0.1 * count } } if (indicators.length > 0) { detected.push({ tone: tone as EmotionalTone['tone'], intensity: Math.min(intensity, 1), indicators, }) } } // Add neutral if no strong emotions detected if (detected.length === 0 || detected.every((d) => d.intensity < 0.3)) { detected.push({ tone: 'neutral', intensity: 0.5, indicators: ['no strong emotional indicators'], }) } return detected } private extractTechnicalTerms(transcript: ProcessedTranscript): TechnicalTerm[] { const termMap = new Map<string, TechnicalTerm>() for (const segment of transcript.segments) { const words = segment.text.split(/\s+/) for (const word of words) { const cleanWord = word.toLowerCase().replace(/[^\w]/g, '') if (cleanWord.length < 3) continue // Check each category for (const [category, indicators] of Object.entries(this.technicalIndicators)) { if (indicators.some((ind) => cleanWord.includes(ind))) { const key = `${cleanWord}:${category}` const existing = termMap.get(key) if (existing) { existing.occurrences.push({ timestamp: segment.startTime, context: this.extractWordContext(segment.text, word), }) } else { termMap.set(key, { term: word, category: category as TechnicalTerm['category'], occurrences: [ { timestamp: segment.startTime, context: this.extractWordContext(segment.text, word), }, ], }) } break } } } } return Array.from(termMap.values()) } private extractWordContext(text: string, word: string): string { const index = text.toLowerCase().indexOf(word.toLowerCase()) if (index === -1) return text const start = Math.max(0, index - 30) const end = Math.min(text.length, index + word.length + 30) let context = text.substring(start, end) if (start > 0) context = '...' + context if (end < text.length) context = context + '...' return context } private extractProblemStatements(transcript: ProcessedTranscript): ProblemStatement[] { const statements: ProblemStatement[] = [] // Patterns for problem statements const problemPatterns = [ // Expected vs Actual /(?:expected|thought|should)\s+(?:to\s+)?(.+?)\s+but\s+(?:got|received|see|shows?)\s+(.+)/i, /(?:supposed\s+to)\s+(.+?)\s+(?:but|instead)\s+(?:it\s+)?(.+)/i, // When/Then patterns /when\s+(?:i|we|you)\s+(.+?)[,.]?\s+(?:then\s+)?(?:it|the)\s+(.+)/i, /(?:if|after)\s+(?:i|we|you)\s+(.+?)[,.]?\s+(?:it|the)\s+(.+)/i, // Direct problem statements /(?:the\s+)?(?:problem|issue)\s+is\s+(?:that\s+)?(.+)/i, /(?:it's|its)\s+not\s+(.+?)\s+(?:properly|correctly|right)/i, /(?:can't|cannot|unable\s+to)\s+(.+)/i, ] for (const segment of transcript.segments) { for (const pattern of problemPatterns) { const match = segment.text.match(pattern) if (match) { const statement: ProblemStatement = { timestamp: segment.startTime, statement: segment.text, } // Try to extract expected vs actual if (match[1] && match[2]) { statement.expectedBehavior = match[1].trim() statement.actualBehavior = match[2].trim() } // Look for user impact const impactMatch = segment.text.match( /(?:this\s+)?(?:prevents?|blocks?|stops?)\s+(?:me|us|users?)\s+(?:from\s+)?(.+)/i ) if (impactMatch) { statement.userImpact = impactMatch[1].trim() } statements.push(statement) break } } } return statements } private extractContext( transcript: ProcessedTranscript, currentSegment: TranscriptSegment ): string { const index = transcript.segments.indexOf(currentSegment) const contextSegments: string[] = [] // Get previous segment if (index > 0) { contextSegments.push(transcript.segments[index - 1].text) } // Current segment contextSegments.push(currentSegment.text) // Get next segment if (index < transcript.segments.length - 1) { contextSegments.push(transcript.segments[index + 1].text) } return contextSegments.join(' ') } private mergeNearbyIssues(issues: VerbalIssue[]): VerbalIssue[] { if (issues.length <= 1) return issues const merged: VerbalIssue[] = [] const used = new Set<number>() for (let i = 0; i < issues.length; i++) { if (used.has(i)) continue const current = issues[i] const nearby: VerbalIssue[] = [current] // Find issues within 10 seconds for (let j = i + 1; j < issues.length; j++) { if (used.has(j)) continue const other = issues[j] if ( Math.abs(other.timestamp - current.timestamp) <= 10 && other.type === current.type ) { nearby.push(other) used.add(j) } } if (nearby.length > 1) { // Merge the issues const allKeywords = [...new Set(nearby.flatMap((n) => n.keywords))] const maxSeverity = nearby.reduce( (max, n) => (this.compareSeverity(max, n.severity) > 0 ? max : n.severity), current.severity ) const avgConfidence = nearby.reduce((sum, n) => sum + n.confidence, 0) / nearby.length merged.push({ ...current, keywords: allKeywords, severity: maxSeverity, confidence: Math.min(avgConfidence * 1.2, 1), // Boost confidence for repeated issues context: nearby.map((n) => n.text).join(' '), }) } else { merged.push(current) } } return merged } private compareSeverity(a: VerbalIssue['severity'], b: VerbalIssue['severity']): number { const order = { critical: 4, high: 3, medium: 2, low: 1 } return order[a] - order[b] } // Correlate verbal issues with visual events correlateWithVisualEvents( audioAnalysis: AudioAnalysisResult, visualTimestamps: Array<{ timestamp: number; type: string; content: string }> ): Array<{ verbal: VerbalIssue visual: (typeof visualTimestamps)[0] | null correlation: number }> { const correlations: Array<{ verbal: VerbalIssue visual: (typeof visualTimestamps)[0] | null correlation: number }> = [] for (const issue of audioAnalysis.verbalIssues) { // Find visual events within 5 seconds const nearbyVisual = visualTimestamps.filter( (v) => Math.abs(v.timestamp - issue.timestamp) <= 5 ) if (nearbyVisual.length > 0) { // Find best match let bestMatch = nearbyVisual[0] let bestScore = 0 for (const visual of nearbyVisual) { let score = 1 / (1 + Math.abs(visual.timestamp - issue.timestamp)) // Boost score for matching types if (issue.type === 'bug' && visual.type === 'error') score *= 2 if (issue.type === 'confusion' && visual.type === 'navigation') score *= 1.5 if (issue.keywords.some((k) => visual.content.toLowerCase().includes(k))) score *= 1.5 if (score > bestScore) { bestScore = score bestMatch = visual } } correlations.push({ verbal: issue, visual: bestMatch, correlation: bestScore, }) } else { correlations.push({ verbal: issue, visual: null, correlation: 0, }) } } return correlations } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/currentspace/shortcut_mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server