Fonoster MCP Server

Official

Overview Schema Related Servers Score Discussions

Deepgram.ts•10.6 KiB

/** * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com) * http://github.com/fonoster/fonoster * * This file is part of Fonoster * * Licensed under the MIT License (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * https://opensource.org/licenses/MIT * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import { Stream } from "stream"; import { Messages, VoiceLanguage } from "@fonoster/common"; import { getLogger } from "@fonoster/logger"; import * as z from "zod"; import { SpeechToText } from "../types"; import { AbstractSpeechToText } from "./AbstractSpeechToText"; import { DeepgramModel, DeepgramSttConfig, SpeechResult, StreamSpeech } from "./types"; const { DeepgramClient, LiveTranscriptionEvents, createClient // eslint-disable-next-line @typescript-eslint/no-var-requires } = require("@deepgram/sdk"); // Why Deepgram :( const ENGINE_NAME = "stt.deepgram"; const logger = getLogger({ service: "apiserver", filePath: __filename }); class Deepgram extends AbstractSpeechToText<typeof ENGINE_NAME> implements SpeechToText { client: typeof DeepgramClient; engineConfig: DeepgramSttConfig; readonly engineName = ENGINE_NAME; constructor(config: DeepgramSttConfig) { super(config); this.client = createClient(config.credentials.apiKey); this.engineConfig = config; } streamTranscribe(stream: Stream): StreamSpeech { const connection = this.client.listen.live( buildTranscribeConfig(this.engineConfig.config) ); const out = new Stream(); // Track last transcript for UtteranceEnd fallback // According to Deepgram docs: "If you receive an UtteranceEnd event without a // preceding speech_final: true, it's advisable to process the last-received // transcript as a complete utterance." // UtteranceEnd fires after finalized words, so we store the last finalized transcript // but also keep any transcript as a fallback let lastFinalizedTranscript: string | null = null; let lastFinalizedTranscriptTime: number = 0; let lastAnyTranscript: string | null = null; let lastAnyTranscriptTime: number = 0; // Add error handler immediately to catch any connection errors connection.on(LiveTranscriptionEvents.Error, (err) => { logger.error("error on Deepgram connection", { err }); // Emit error properly for handling upstream out.emit("error", new Error("Speech recognition service error")); try { connection.destroy(); } catch (destroyErr) { logger.error("error destroying connection", { destroyErr }); } }); connection.on(LiveTranscriptionEvents.Open, () => { stream.on("data", (chunk) => { try { connection.send(chunk); } catch (err) { logger.error("error sending chunk to Deepgram", { err }); } }); connection.on(LiveTranscriptionEvents.Transcript, (data) => { const transcript = data.channel?.alternatives?.[0]?.transcript; const hasTranscript = !!transcript; const isFinal = data.is_final === true; const speechFinal = data.speech_final === true; // Store any transcript for UtteranceEnd fallback if (hasTranscript) { lastAnyTranscript = transcript; lastAnyTranscriptTime = Date.now(); // Store finalized transcripts separately (preferred for UtteranceEnd) if (isFinal || speechFinal) { lastFinalizedTranscript = transcript; lastFinalizedTranscriptTime = Date.now(); } } // Process transcript if it has content and is final // Check both speech_final (primary) and is_final (backup) if (!hasTranscript || (!speechFinal && !isFinal)) { return; } const words = data.channel.alternatives[0].words || []; const responseTime = words.length > 0 ? (words.reduce( (acc: number, word: any) => acc + (word.end - word.start), 0 ) * 1000) / words.length : 0; logger.verbose("transcribe result", { speech: transcript, responseTime, isFinal, speechFinal }); out.emit("data", { speech: transcript, responseTime }); // Clear transcripts after processing (they've been emitted) lastFinalizedTranscript = null; lastAnyTranscript = null; }); // CRITICAL: Handle UtteranceEnd events (fallback when speech_final never becomes true) // This is Deepgram's recommended fallback mechanism for noisy environments // UtteranceEnd requires: interim_results=true and utterance_end_ms parameter // UtteranceEnd fires after finalized words, so prefer lastFinalizedTranscript connection.on(LiveTranscriptionEvents.UtteranceEnd, (data) => { // Prefer finalized transcript, fall back to any transcript const transcriptToUse = lastFinalizedTranscript || lastAnyTranscript; const transcriptTime = lastFinalizedTranscript ? lastFinalizedTranscriptTime : lastAnyTranscriptTime; if (transcriptToUse) { // Use last_word_end from UtteranceEnd event if available for more accurate timing // Otherwise fall back to time since last transcript const lastWordEnd = data?.last_word_end; const responseTime = lastWordEnd ? lastWordEnd * 1000 // Convert seconds to milliseconds : transcriptTime ? Date.now() - transcriptTime : 0; logger.info("Deepgram UtteranceEnd - processing last transcript", { speech: transcriptToUse, responseTime, lastWordEnd: lastWordEnd, wasFinalized: !!lastFinalizedTranscript }); out.emit("data", { speech: transcriptToUse, responseTime }); // Clear transcripts after processing lastFinalizedTranscript = null; lastAnyTranscript = null; } else { logger.warn( "Deepgram UtteranceEnd received but no last transcript available" ); } }); }); // Handle stream errors and cleanup stream.on("error", (err) => { logger.warn("error on input stream", { err }); // Instead of emitting an error, just end the stream with a message out.emit("data", { speech: "Error with audio input stream", responseTime: 0 }); out.emit("end"); try { connection.destroy(); } catch (destroyErr) { logger.warn("error destroying connection", { destroyErr }); } }); stream.on("end", () => { try { connection.destroy(); } catch (err) { logger.error("error destroying connection on stream end", { err }); } }); return out; } async transcribe(stream: Stream): Promise<SpeechResult> { return new Promise((resolve, reject) => { const startTime = performance.now(); const connection = this.client.listen.live( buildTranscribeConfig(this.engineConfig.config) ); stream.on("data", (chunk) => { connection.send(chunk); }); connection.on(LiveTranscriptionEvents.Open, () => { connection.on(LiveTranscriptionEvents.Transcript, (data) => { if (data.channel.alternatives[0].transcript && data.speech_final) { const result: SpeechResult = { speech: data.channel.alternatives[0].transcript, isFinal: true, responseTime: performance.now() - startTime }; logger.verbose("transcribe result", { speech: result.speech, responseTime: result.responseTime }); resolve(result); connection.destroy(); } }); connection.on(LiveTranscriptionEvents.Error, (err) => { logger.warn("error on Deepgram connection", { err }); reject(err); }); }); stream.on("end", () => { try { connection.destroy(); } catch (destroyErr) { logger.error("error destroying connection", { destroyErr }); } }); stream.on("error", (err) => { try { connection.destroy(); } catch (destroyErr) { logger.error("error destroying connection", { destroyErr }); } reject(err); }); }); } static getConfigValidationSchema(): z.Schema { return z.object({ smartFormat: z.boolean().optional(), noDelay: z.boolean().optional(), languageCode: z .nativeEnum(VoiceLanguage, { message: Messages.VALID_LANGUAGE_CODE }) .optional(), model: z .nativeEnum(DeepgramModel, { message: "Invalid Deepgram model" }) .optional(), interimResults: z.boolean().optional(), utteranceEndMs: z.number().int().min(1000).max(5000).optional() }); } static getCredentialsValidationSchema(): z.Schema { return z.object({ apiKey: z.string() }); } } function buildTranscribeConfig(config: { model: DeepgramModel; languageCode: VoiceLanguage; smartFormat?: boolean; noDelay?: boolean; interimResults?: boolean; utteranceEndMs?: number; }) { // UtteranceEnd requires interim_results to be enabled // Default to true to enable UtteranceEnd fallback mechanism const interimResults = config.interimResults !== false; // Default utterance_end_ms to 1000ms (minimum required value) // This enables UtteranceEnd events as a fallback when speech_final never becomes true const utteranceEndMs = config.utteranceEndMs || 1000; return { ...config, model: config.model || DeepgramModel.NOVA_2_PHONECALL, language: config.languageCode || VoiceLanguage.EN_US, encoding: "linear16", sample_rate: 16000, channels: 1, smart_format: config.smartFormat !== false, // This needs to be set to true to avoid delays while using smart_format no_delay: config.noDelay !== false, // REQUIRED for UtteranceEnd: interim_results must be true interim_results: interimResults, // REQUIRED for UtteranceEnd: utterance_end_ms parameter utterance_end_ms: utteranceEndMs }; } export { Deepgram, ENGINE_NAME };

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/fonoster/fonoster'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

Deepgram.ts•10.6 KiB