audioAnalyzer.ts•25.7 kB
import type { ProcessedTranscript, TranscriptSegment } from './videoAnalysis'
export interface VerbalIssue {
timestamp: number
text: string
type: 'bug' | 'confusion' | 'expectation' | 'frustration' | 'question' | 'observation'
severity: 'critical' | 'high' | 'medium' | 'low'
confidence: number
keywords: string[]
context?: string
}
export interface SpeakerIntent {
timestamp: number
intent: 'reporting' | 'explaining' | 'demonstrating' | 'questioning' | 'troubleshooting'
confidence: number
segment: string
}
export interface AudioAnalysisResult {
verbalIssues: VerbalIssue[]
speakerIntents: SpeakerIntent[]
keyPhrases: KeyPhrase[]
emotionalTone: EmotionalTone[]
technicalTerms: TechnicalTerm[]
problemStatements: ProblemStatement[]
}
export interface KeyPhrase {
phrase: string
frequency: number
timestamps: number[]
importance: number
}
export interface EmotionalTone {
timestamp: number
tone: 'frustrated' | 'confused' | 'surprised' | 'neutral' | 'satisfied'
intensity: number
indicators: string[]
}
export interface TechnicalTerm {
term: string
category: 'component' | 'error' | 'feature' | 'data' | 'action' | 'state'
occurrences: Array<{ timestamp: number; context: string }>
}
export interface ProblemStatement {
timestamp: number
statement: string
expectedBehavior?: string
actualBehavior?: string
userImpact?: string
}
export class AudioAnalyzer {
// Common issue-related phrases and their weights
private readonly issueIndicators = {
bug: {
phrases: [
'bug',
'broken',
'not working',
"doesn't work",
'failing',
'failed',
'error',
'crash',
'frozen',
'stuck',
'hangs',
'unresponsive',
],
weight: 0.9,
},
confusion: {
phrases: [
'confused',
"don't understand",
'not sure',
'unclear',
'what is',
'where is',
'how do i',
"can't find",
'missing',
'lost',
],
weight: 0.7,
},
expectation: {
phrases: [
'should',
'supposed to',
'expected',
'thought it would',
'normally',
'usually',
'used to',
'different from',
'changed',
],
weight: 0.6,
},
frustration: {
phrases: [
'annoying',
'frustrating',
'ugh',
'argh',
'seriously',
'come on',
'why',
'still',
'again',
'keeps',
'always',
'never',
],
weight: 0.8,
},
question: {
phrases: [
'why is',
'how come',
"what's",
"where's",
'when did',
'who',
'is this',
'are we',
'could this',
'should this',
],
weight: 0.5,
},
observation: {
phrases: [
'notice',
'see',
'look',
'shows',
'displays',
'appears',
'seems',
'looks like',
'behaves',
'happens',
],
weight: 0.4,
},
}
// Technical indicators for categorizing terms
private readonly technicalIndicators = {
component: [
'component',
'page',
'modal',
'button',
'form',
'input',
'dropdown',
'menu',
'panel',
'widget',
],
error: ['error', 'exception', 'fail', 'timeout', 'undefined', 'null', 'invalid', 'missing'],
feature: ['feature', 'functionality', 'capability', 'option', 'setting', 'preference'],
data: ['data', 'value', 'field', 'record', 'entry', 'item', 'list', 'table'],
action: ['click', 'type', 'submit', 'save', 'delete', 'update', 'create', 'load'],
state: [
'state',
'status',
'loading',
'saving',
'enabled',
'disabled',
'active',
'selected',
],
}
// Emotional tone indicators
private readonly emotionalIndicators = {
frustrated: {
words: ['frustrating', 'annoying', 'irritating', 'ugh', 'argh', 'seriously'],
patterns: [/!+/, /\?{2,}/, /\.{3,}/],
repetition: ['still', 'again', 'keeps', 'always'],
},
confused: {
words: ['confused', 'unclear', 'weird', 'strange', 'odd', 'hmm'],
patterns: [/\?+/, /what\s+the/, /i\s+don't\s+understand/i],
repetition: ['what', 'where', 'how', 'why'],
},
surprised: {
words: ['wow', 'oh', 'whoa', 'unexpected', 'suddenly', 'surprised'],
patterns: [/oh\s+my/, /what\s+the/, /didn't\s+expect/i],
repetition: [],
},
}
async analyzeTranscript(transcript: ProcessedTranscript): Promise<AudioAnalysisResult> {
const verbalIssues = this.extractVerbalIssues(transcript)
const speakerIntents = this.detectSpeakerIntents(transcript)
const keyPhrases = this.extractKeyPhrases(transcript)
const emotionalTone = this.analyzeEmotionalTone(transcript)
const technicalTerms = this.extractTechnicalTerms(transcript)
const problemStatements = this.extractProblemStatements(transcript)
return {
verbalIssues,
speakerIntents,
keyPhrases,
emotionalTone,
technicalTerms,
problemStatements,
}
}
private extractVerbalIssues(transcript: ProcessedTranscript): VerbalIssue[] {
const issues: VerbalIssue[] = []
for (const segment of transcript.segments) {
const text = segment.text.toLowerCase()
const words = text.split(/\s+/)
// Check each issue type
for (const [type, config] of Object.entries(this.issueIndicators)) {
const matches = this.findIssueMatches(text, words, config.phrases)
if (matches.length > 0) {
const severity = this.calculateSeverity(
text,
matches,
type as VerbalIssue['type']
)
const confidence = Math.min(matches.length * config.weight * 0.3, 1)
issues.push({
timestamp: segment.startTime,
text: segment.text,
type: type as VerbalIssue['type'],
severity,
confidence,
keywords: matches,
context: this.extractContext(transcript, segment),
})
}
}
}
// Merge nearby issues and boost confidence
return this.mergeNearbyIssues(issues)
}
private findIssueMatches(text: string, words: string[], phrases: string[]): string[] {
const matches: string[] = []
for (const phrase of phrases) {
if (phrase.includes(' ')) {
// Multi-word phrase
if (text.includes(phrase)) {
matches.push(phrase)
}
} else {
// Single word
if (words.includes(phrase)) {
matches.push(phrase)
}
}
}
return [...new Set(matches)]
}
private calculateSeverity(
text: string,
matches: string[],
type: VerbalIssue['type']
): VerbalIssue['severity'] {
let score = matches.length * 0.2
// Boost for emphasis
if (text.includes('!')) score += 0.2
if (text.includes('really') || text.includes('very')) score += 0.15
if (text.includes('completely') || text.includes('totally')) score += 0.2
if (text.match(/\b(critical|severe|major|blocking)\b/)) score += 0.3
// Boost for certain issue types
if (type === 'bug' || type === 'frustration') score += 0.2
// Check for user impact mentions
if (text.match(/\b(can't|cannot|unable|impossible|prevents)\b/)) score += 0.25
if (text.match(/\b(users?|customers?|everyone|team)\b/)) score += 0.2
if (score >= 0.8) return 'critical'
if (score >= 0.6) return 'high'
if (score >= 0.4) return 'medium'
return 'low'
}
private detectSpeakerIntents(transcript: ProcessedTranscript): SpeakerIntent[] {
const intents: SpeakerIntent[] = []
const intentPatterns = {
reporting: [
/i\s+found\s+(?:a\s+)?(?:bug|issue|problem)/i,
/there's\s+(?:a\s+)?(?:bug|issue|problem)/i,
/(?:this|it)\s+(?:is|seems)\s+broken/i,
/reporting\s+(?:a|an|this)/i,
],
explaining: [
/what\s+(?:happens|happened)\s+(?:is|was)/i,
/let\s+me\s+(?:show|explain)/i,
/here's\s+what\s+(?:i|we)\s+(?:see|saw)/i,
/the\s+(?:issue|problem)\s+is/i,
],
demonstrating: [
/(?:i'm|i\s+am)\s+(?:going\s+to|gonna)\s+(?:show|click|type)/i,
/watch\s+what\s+happens/i,
/(?:see|look)\s+(?:here|at\s+this)/i,
/notice\s+(?:how|what)/i,
],
questioning: [
/(?:why|how\s+come)\s+(?:is|does|doesn't)/i,
/is\s+(?:this|it)\s+supposed\s+to/i,
/should\s+(?:this|it)\s+be/i,
/what's\s+going\s+on/i,
],
troubleshooting: [
/(?:i|we)\s+(?:tried|attempted)/i,
/(?:doesn't|won't)\s+work\s+(?:when|if)/i,
/(?:only|always)\s+happens\s+(?:when|if)/i,
/(?:refresh|reload|restart)(?:ing|ed)?/i,
],
}
for (const segment of transcript.segments) {
for (const [intent, patterns] of Object.entries(intentPatterns)) {
for (const pattern of patterns) {
if (pattern.test(segment.text)) {
intents.push({
timestamp: segment.startTime,
intent: intent as SpeakerIntent['intent'],
confidence: 0.8,
segment: segment.text,
})
break
}
}
}
}
return intents
}
private extractKeyPhrases(transcript: ProcessedTranscript): KeyPhrase[] {
const phraseMap = new Map<string, { count: number; timestamps: number[] }>()
// Extract 2-4 word phrases
for (const segment of transcript.segments) {
const words = segment.text
.toLowerCase()
.replace(/[^\w\s]/g, '')
.split(/\s+/)
.filter((w) => w.length > 2)
// Generate n-grams
for (let n = 2; n <= 4; n++) {
for (let i = 0; i <= words.length - n; i++) {
const phrase = words.slice(i, i + n).join(' ')
// Skip common phrases
if (this.isCommonPhrase(phrase)) continue
const existing = phraseMap.get(phrase) || { count: 0, timestamps: [] }
existing.count++
existing.timestamps.push(segment.startTime)
phraseMap.set(phrase, existing)
}
}
}
// Convert to KeyPhrase array and calculate importance
const keyPhrases: KeyPhrase[] = []
for (const [phrase, data] of phraseMap.entries()) {
if (data.count >= 2) {
// Only include repeated phrases
keyPhrases.push({
phrase,
frequency: data.count,
timestamps: data.timestamps,
importance: this.calculatePhraseImportance(phrase, data.count),
})
}
}
return keyPhrases.sort((a, b) => b.importance - a.importance).slice(0, 20)
}
private isCommonPhrase(phrase: string): boolean {
const common = [
'going to',
'want to',
'need to',
'have to',
'used to',
'able to',
'trying to',
'supposed to',
'the the',
'and the',
'in the',
'on the',
'at the',
'to the',
'of the',
]
return common.includes(phrase)
}
private calculatePhraseImportance(phrase: string, frequency: number): number {
let importance = frequency * 0.2
// Boost technical phrases
if (phrase.match(/\b(error|bug|issue|problem|fail)\b/)) importance += 0.3
if (phrase.match(/\b(component|function|api|endpoint)\b/)) importance += 0.2
if (phrase.match(/\b(user|customer|client)\b/)) importance += 0.2
// Boost action phrases
if (phrase.match(/\b(click|type|submit|save|load)\b/)) importance += 0.15
return Math.min(importance, 1)
}
private analyzeEmotionalTone(transcript: ProcessedTranscript): EmotionalTone[] {
const tones: EmotionalTone[] = []
for (const segment of transcript.segments) {
const text = segment.text.toLowerCase()
const detectedTones = this.detectEmotionalTones(text)
if (detectedTones.length > 0) {
// Use the strongest detected tone
const strongestTone = detectedTones.reduce((a, b) =>
a.intensity > b.intensity ? a : b
)
tones.push({
timestamp: segment.startTime,
tone: strongestTone.tone,
intensity: strongestTone.intensity,
indicators: strongestTone.indicators,
})
}
}
return tones
}
private detectEmotionalTones(
text: string
): Array<{ tone: EmotionalTone['tone']; intensity: number; indicators: string[] }> {
const detected: Array<{
tone: EmotionalTone['tone']
intensity: number
indicators: string[]
}> = []
for (const [tone, config] of Object.entries(this.emotionalIndicators)) {
const indicators: string[] = []
let intensity = 0
// Check words
for (const word of config.words) {
if (text.includes(word)) {
indicators.push(word)
intensity += 0.3
}
}
// Check patterns
for (const pattern of config.patterns) {
if (pattern.test(text)) {
indicators.push('pattern: ' + pattern.source)
intensity += 0.2
}
}
// Check repetition
for (const word of config.repetition) {
const count = (text.match(new RegExp(`\\b${word}\\b`, 'g')) || []).length
if (count >= 2) {
indicators.push(`repeated: ${word} (${count}x)`)
intensity += 0.1 * count
}
}
if (indicators.length > 0) {
detected.push({
tone: tone as EmotionalTone['tone'],
intensity: Math.min(intensity, 1),
indicators,
})
}
}
// Add neutral if no strong emotions detected
if (detected.length === 0 || detected.every((d) => d.intensity < 0.3)) {
detected.push({
tone: 'neutral',
intensity: 0.5,
indicators: ['no strong emotional indicators'],
})
}
return detected
}
private extractTechnicalTerms(transcript: ProcessedTranscript): TechnicalTerm[] {
const termMap = new Map<string, TechnicalTerm>()
for (const segment of transcript.segments) {
const words = segment.text.split(/\s+/)
for (const word of words) {
const cleanWord = word.toLowerCase().replace(/[^\w]/g, '')
if (cleanWord.length < 3) continue
// Check each category
for (const [category, indicators] of Object.entries(this.technicalIndicators)) {
if (indicators.some((ind) => cleanWord.includes(ind))) {
const key = `${cleanWord}:${category}`
const existing = termMap.get(key)
if (existing) {
existing.occurrences.push({
timestamp: segment.startTime,
context: this.extractWordContext(segment.text, word),
})
} else {
termMap.set(key, {
term: word,
category: category as TechnicalTerm['category'],
occurrences: [
{
timestamp: segment.startTime,
context: this.extractWordContext(segment.text, word),
},
],
})
}
break
}
}
}
}
return Array.from(termMap.values())
}
private extractWordContext(text: string, word: string): string {
const index = text.toLowerCase().indexOf(word.toLowerCase())
if (index === -1) return text
const start = Math.max(0, index - 30)
const end = Math.min(text.length, index + word.length + 30)
let context = text.substring(start, end)
if (start > 0) context = '...' + context
if (end < text.length) context = context + '...'
return context
}
private extractProblemStatements(transcript: ProcessedTranscript): ProblemStatement[] {
const statements: ProblemStatement[] = []
// Patterns for problem statements
const problemPatterns = [
// Expected vs Actual
/(?:expected|thought|should)\s+(?:to\s+)?(.+?)\s+but\s+(?:got|received|see|shows?)\s+(.+)/i,
/(?:supposed\s+to)\s+(.+?)\s+(?:but|instead)\s+(?:it\s+)?(.+)/i,
// When/Then patterns
/when\s+(?:i|we|you)\s+(.+?)[,.]?\s+(?:then\s+)?(?:it|the)\s+(.+)/i,
/(?:if|after)\s+(?:i|we|you)\s+(.+?)[,.]?\s+(?:it|the)\s+(.+)/i,
// Direct problem statements
/(?:the\s+)?(?:problem|issue)\s+is\s+(?:that\s+)?(.+)/i,
/(?:it's|its)\s+not\s+(.+?)\s+(?:properly|correctly|right)/i,
/(?:can't|cannot|unable\s+to)\s+(.+)/i,
]
for (const segment of transcript.segments) {
for (const pattern of problemPatterns) {
const match = segment.text.match(pattern)
if (match) {
const statement: ProblemStatement = {
timestamp: segment.startTime,
statement: segment.text,
}
// Try to extract expected vs actual
if (match[1] && match[2]) {
statement.expectedBehavior = match[1].trim()
statement.actualBehavior = match[2].trim()
}
// Look for user impact
const impactMatch = segment.text.match(
/(?:this\s+)?(?:prevents?|blocks?|stops?)\s+(?:me|us|users?)\s+(?:from\s+)?(.+)/i
)
if (impactMatch) {
statement.userImpact = impactMatch[1].trim()
}
statements.push(statement)
break
}
}
}
return statements
}
private extractContext(
transcript: ProcessedTranscript,
currentSegment: TranscriptSegment
): string {
const index = transcript.segments.indexOf(currentSegment)
const contextSegments: string[] = []
// Get previous segment
if (index > 0) {
contextSegments.push(transcript.segments[index - 1].text)
}
// Current segment
contextSegments.push(currentSegment.text)
// Get next segment
if (index < transcript.segments.length - 1) {
contextSegments.push(transcript.segments[index + 1].text)
}
return contextSegments.join(' ')
}
private mergeNearbyIssues(issues: VerbalIssue[]): VerbalIssue[] {
if (issues.length <= 1) return issues
const merged: VerbalIssue[] = []
const used = new Set<number>()
for (let i = 0; i < issues.length; i++) {
if (used.has(i)) continue
const current = issues[i]
const nearby: VerbalIssue[] = [current]
// Find issues within 10 seconds
for (let j = i + 1; j < issues.length; j++) {
if (used.has(j)) continue
const other = issues[j]
if (
Math.abs(other.timestamp - current.timestamp) <= 10 &&
other.type === current.type
) {
nearby.push(other)
used.add(j)
}
}
if (nearby.length > 1) {
// Merge the issues
const allKeywords = [...new Set(nearby.flatMap((n) => n.keywords))]
const maxSeverity = nearby.reduce(
(max, n) => (this.compareSeverity(max, n.severity) > 0 ? max : n.severity),
current.severity
)
const avgConfidence =
nearby.reduce((sum, n) => sum + n.confidence, 0) / nearby.length
merged.push({
...current,
keywords: allKeywords,
severity: maxSeverity,
confidence: Math.min(avgConfidence * 1.2, 1), // Boost confidence for repeated issues
context: nearby.map((n) => n.text).join(' '),
})
} else {
merged.push(current)
}
}
return merged
}
private compareSeverity(a: VerbalIssue['severity'], b: VerbalIssue['severity']): number {
const order = { critical: 4, high: 3, medium: 2, low: 1 }
return order[a] - order[b]
}
// Correlate verbal issues with visual events
correlateWithVisualEvents(
audioAnalysis: AudioAnalysisResult,
visualTimestamps: Array<{ timestamp: number; type: string; content: string }>
): Array<{
verbal: VerbalIssue
visual: (typeof visualTimestamps)[0] | null
correlation: number
}> {
const correlations: Array<{
verbal: VerbalIssue
visual: (typeof visualTimestamps)[0] | null
correlation: number
}> = []
for (const issue of audioAnalysis.verbalIssues) {
// Find visual events within 5 seconds
const nearbyVisual = visualTimestamps.filter(
(v) => Math.abs(v.timestamp - issue.timestamp) <= 5
)
if (nearbyVisual.length > 0) {
// Find best match
let bestMatch = nearbyVisual[0]
let bestScore = 0
for (const visual of nearbyVisual) {
let score = 1 / (1 + Math.abs(visual.timestamp - issue.timestamp))
// Boost score for matching types
if (issue.type === 'bug' && visual.type === 'error') score *= 2
if (issue.type === 'confusion' && visual.type === 'navigation') score *= 1.5
if (issue.keywords.some((k) => visual.content.toLowerCase().includes(k)))
score *= 1.5
if (score > bestScore) {
bestScore = score
bestMatch = visual
}
}
correlations.push({
verbal: issue,
visual: bestMatch,
correlation: bestScore,
})
} else {
correlations.push({
verbal: issue,
visual: null,
correlation: 0,
})
}
}
return correlations
}
}