glin-profanity-mcp

Overview Schema Related Servers Score Discussions

audio.ts•12.7 KiB

/** * Audio Pipeline Utilities for glin-profanity * * Provides utilities for checking profanity in audio content. * This module does NOT include speech-to-text - users bring their own * transcription from Whisper, Google STT, Azure Speech, etc. * * @example * ```typescript * import { createAudioPipeline } from 'glin-profanity/audio'; * import OpenAI from 'openai'; * * const openai = new OpenAI(); * const pipeline = createAudioPipeline({ * transcriber: async (audioBuffer) => { * const response = await openai.audio.transcriptions.create({ * file: audioBuffer, * model: 'whisper-1', * }); * return response.text; * }, * }); * * const result = await pipeline.checkAudio(audioFile); * console.log(result.containsProfanity); * ``` * * @packageDocumentation * @module glin-profanity/audio */ import { Filter } from '../filters/Filter'; import type { FilterConfig, Language, CheckProfanityResult } from '../types/types'; /** * Transcription function type * Users provide their own transcription implementation */ export type TranscriberFunction = (audio: AudioInput) => Promise<string>; /** * Audio input types */ export type AudioInput = Buffer | Uint8Array | Blob | File | string; /** * Audio pipeline configuration */ export interface AudioPipelineConfig { /** Custom transcription function (REQUIRED) */ transcriber: TranscriberFunction; /** Languages for profanity detection */ languages?: Language[]; /** Enable leetspeak detection */ detectLeetspeak?: boolean; /** Enable Unicode normalization */ normalizeUnicode?: boolean; /** Custom filter configuration */ filterConfig?: Partial<FilterConfig>; } /** * Audio check result */ export interface AudioCheckResult { /** Whether profanity was found */ containsProfanity: boolean; /** Transcribed text from audio */ transcribedText: string; /** Profane words found */ profaneWords: string[]; /** Full profanity check result */ profanityResult: CheckProfanityResult; /** Processing time in milliseconds */ processingTimeMs: number; /** Transcription time in milliseconds */ transcriptionTimeMs: number; /** Profanity check time in milliseconds */ checkTimeMs: number; } /** * Segment result for timestamped audio */ export interface AudioSegmentResult { /** Segment index */ index: number; /** Start time in seconds */ startTime: number; /** End time in seconds */ endTime: number; /** Transcribed text for this segment */ text: string; /** Whether this segment contains profanity */ containsProfanity: boolean; /** Profane words in this segment */ profaneWords: string[]; } /** * Creates an audio profanity checking pipeline * * @example * ```typescript * // With OpenAI Whisper * const pipeline = createAudioPipeline({ * transcriber: async (audio) => { * const formData = new FormData(); * formData.append('file', audio); * formData.append('model', 'whisper-1'); * * const response = await fetch('https://api.openai.com/v1/audio/transcriptions', { * method: 'POST', * headers: { 'Authorization': `Bearer ${process.env.OPENAI_API_KEY}` }, * body: formData, * }); * const data = await response.json(); * return data.text; * }, * }); * * // With Google Cloud Speech-to-Text * const pipeline = createAudioPipeline({ * transcriber: async (audio) => { * // Your Google STT implementation * return transcribedText; * }, * }); * ``` */ export function createAudioPipeline(config: AudioPipelineConfig) { const { transcriber, languages = ['english'], detectLeetspeak = true, normalizeUnicode = true, filterConfig = {}, } = config; const filter = new Filter({ languages, detectLeetspeak, normalizeUnicode, severityLevels: true, cacheResults: true, ...filterConfig, }); return { /** * Check audio for profanity */ async checkAudio(audio: AudioInput): Promise<AudioCheckResult> { const startTime = Date.now(); // Transcribe const transcribeStart = Date.now(); const transcribedText = await transcriber(audio); const transcriptionTimeMs = Date.now() - transcribeStart; // Check for profanity const checkStart = Date.now(); const profanityResult = filter.checkProfanity(transcribedText); const checkTimeMs = Date.now() - checkStart; return { containsProfanity: profanityResult.containsProfanity, transcribedText, profaneWords: profanityResult.profaneWords, profanityResult, processingTimeMs: Date.now() - startTime, transcriptionTimeMs, checkTimeMs, }; }, /** * Check multiple audio files */ async checkMultiple(audios: AudioInput[]): Promise<AudioCheckResult[]> { return Promise.all(audios.map((audio) => this.checkAudio(audio))); }, /** * Check pre-transcribed text (if you already have transcription) */ checkTranscript(text: string): CheckProfanityResult { return filter.checkProfanity(text); }, /** * Check timestamped segments (for Whisper with timestamps) * * @example * ```typescript * const segments = [ * { startTime: 0, endTime: 5, text: 'Hello everyone' }, * { startTime: 5, endTime: 10, text: 'This is a test' }, * ]; * const results = pipeline.checkSegments(segments); * const flaggedSegments = results.filter(s => s.containsProfanity); * ``` */ checkSegments( segments: Array<{ startTime: number; endTime: number; text: string }> ): AudioSegmentResult[] { return segments.map((segment, index) => { const result = filter.checkProfanity(segment.text); return { index, startTime: segment.startTime, endTime: segment.endTime, text: segment.text, containsProfanity: result.containsProfanity, profaneWords: result.profaneWords, }; }); }, /** * Censor transcribed text */ censorTranscript(text: string, replacement = '***'): string { const censorFilter = new Filter({ languages, detectLeetspeak, normalizeUnicode, replaceWith: replacement, }); const result = censorFilter.checkProfanity(text); return result.processedText || text; }, /** * Get the underlying filter instance */ getFilter(): Filter { return filter; }, }; } /** * Creates a transcriber function for OpenAI Whisper API * * @example * ```typescript * const transcriber = createWhisperTranscriber({ * apiKey: process.env.OPENAI_API_KEY, * model: 'whisper-1', * }); * * const pipeline = createAudioPipeline({ transcriber }); * ``` */ export function createWhisperTranscriber(config: { apiKey: string; model?: string; baseUrl?: string; language?: string; }): TranscriberFunction { const { apiKey, model = 'whisper-1', baseUrl = 'https://api.openai.com/v1', language, } = config; return async (audio: AudioInput): Promise<string> => { const formData = new FormData(); // Handle different input types if (audio instanceof Blob || audio instanceof File) { formData.append('file', audio); } else if (typeof audio === 'string') { // Assume it's a file path - this only works in Node.js throw new Error('File paths not supported in browser. Pass a Blob or File instead.'); } else { // Buffer or Uint8Array - convert to Uint8Array copy for Blob compatibility const uint8 = audio instanceof Uint8Array ? audio : new Uint8Array(audio as unknown as ArrayBuffer); // Create a copy to ensure we have a proper ArrayBuffer (not SharedArrayBuffer) const copy = new Uint8Array(uint8); const blob = new Blob([copy], { type: 'audio/wav' }); formData.append('file', blob, 'audio.wav'); } formData.append('model', model); if (language) { formData.append('language', language); } const response = await fetch(`${baseUrl}/audio/transcriptions`, { method: 'POST', headers: { 'Authorization': `Bearer ${apiKey}`, }, body: formData, }); if (!response.ok) { throw new Error(`Whisper API error: ${response.status} ${response.statusText}`); } const data = await response.json(); return data.text; }; } /** * Creates a transcriber function for Google Cloud Speech-to-Text * * @example * ```typescript * const transcriber = createGoogleSTTTranscriber({ * apiKey: process.env.GOOGLE_API_KEY, * languageCode: 'en-US', * }); * * const pipeline = createAudioPipeline({ transcriber }); * ``` */ export function createGoogleSTTTranscriber(config: { apiKey: string; languageCode?: string; enableAutomaticPunctuation?: boolean; profanityFilter?: boolean; }): TranscriberFunction { const { apiKey, languageCode = 'en-US', enableAutomaticPunctuation = true, profanityFilter = false, // We do our own filtering } = config; return async (audio: AudioInput): Promise<string> => { // Convert to base64 let audioContent: string; if (audio instanceof Blob) { const buffer = await audio.arrayBuffer(); audioContent = btoa(String.fromCharCode(...new Uint8Array(buffer))); } else if (audio instanceof ArrayBuffer || audio instanceof Uint8Array) { const bytes = audio instanceof ArrayBuffer ? new Uint8Array(audio) : audio; audioContent = btoa(String.fromCharCode(...bytes)); } else if (typeof audio === 'string') { throw new Error('File paths not supported. Pass audio data directly.'); } else { // Node.js Buffer audioContent = (audio as Buffer).toString('base64'); } const response = await fetch( `https://speech.googleapis.com/v1/speech:recognize?key=${apiKey}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ config: { encoding: 'LINEAR16', sampleRateHertz: 16000, languageCode, enableAutomaticPunctuation, profanityFilter, }, audio: { content: audioContent }, }), } ); if (!response.ok) { throw new Error(`Google STT API error: ${response.status} ${response.statusText}`); } const data = await response.json(); const results = data.results || []; return results .map((r: { alternatives?: Array<{ transcript?: string }> }) => r.alternatives?.[0]?.transcript || '' ) .join(' '); }; } /** * Real-time audio stream checker * For live audio moderation (e.g., voice chat, podcasts) * * @example * ```typescript * const streamChecker = createRealtimeChecker({ * transcriber: myTranscriber, * onProfanityDetected: (result) => { * console.log('Profanity detected:', result.profaneWords); * // Trigger beep, mute, or warning * }, * }); * * // Feed audio chunks as they arrive * audioStream.on('data', (chunk) => { * streamChecker.processChunk(chunk); * }); * * // Get summary when done * const summary = streamChecker.getSummary(); * ``` */ export function createRealtimeChecker(config: { transcriber: TranscriberFunction; onProfanityDetected?: (result: AudioCheckResult) => void; bufferDurationMs?: number; languages?: Language[]; detectLeetspeak?: boolean; }) { const { transcriber, onProfanityDetected, languages = ['english'], detectLeetspeak = true, } = config; const pipeline = createAudioPipeline({ transcriber, languages, detectLeetspeak, }); const results: AudioCheckResult[] = []; let totalProfaneWords: string[] = []; return { /** * Process an audio chunk */ async processChunk(chunk: AudioInput): Promise<AudioCheckResult> { const result = await pipeline.checkAudio(chunk); results.push(result); if (result.containsProfanity) { totalProfaneWords = [...totalProfaneWords, ...result.profaneWords]; onProfanityDetected?.(result); } return result; }, /** * Get summary of all processed chunks */ getSummary() { const flaggedCount = results.filter((r) => r.containsProfanity).length; const uniqueProfaneWords = [...new Set(totalProfaneWords)]; return { totalChunks: results.length, flaggedChunks: flaggedCount, cleanChunks: results.length - flaggedCount, flagRate: results.length > 0 ? flaggedCount / results.length : 0, allProfaneWords: uniqueProfaneWords, fullTranscript: results.map((r) => r.transcribedText).join(' '), }; }, /** * Reset the checker state */ reset() { results.length = 0; totalProfaneWords = []; }, }; } export type { CheckProfanityResult, FilterConfig, Language };

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/thegdsks/glin-profanity-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

audio.ts•12.7 KiB