TranscriptionTools MCP Server
import { FileHandler } from '../utils/file-handler.js';
/**
* Interface for the format_transcript function parameters
*/
export interface FormatTranscriptParams {
input_text: string;
is_file_path?: boolean;
paragraph_gap?: number; // seconds
line_gap?: number; // seconds
}
/**
* Transforms timestamped transcripts into naturally formatted text
* @param params - Parameters for the formatting process
* @returns Object containing the formatted text
*/
export async function formatTranscript(params: FormatTranscriptParams): Promise<{ formatted_text: string }> {
try {
const {
input_text,
is_file_path = false,
paragraph_gap = 8, // default 8 seconds for paragraph breaks
line_gap = 4 // default 4 seconds for line breaks
} = params;
// Resolve content (either direct text or from file)
const textContent = await FileHandler.resolveTextContent(input_text, is_file_path);
// Parse the timestamped transcript
const lines = textContent.trim().split('\n');
// This will store our processed text segments with their timestamps
const segments: Array<{ time: number; text: string }> = [];
// Parse each line to extract timestamp and text
for (const line of lines) {
// Extract timestamp using regex
const match = line.match(/\[(\d{2}):(\d{2}):(\d{2})\]\s*(.*)/);
if (match) {
const hours = parseInt(match[1], 10);
const minutes = parseInt(match[2], 10);
const seconds = parseInt(match[3], 10);
const text = match[4].trim();
// Convert timestamp to seconds
const timeInSeconds = hours * 3600 + minutes * 60 + seconds;
// Add to segments
segments.push({ time: timeInSeconds, text });
} else {
// Handle lines without timestamps
if (segments.length > 0) {
// Append to the previous segment if it exists
segments[segments.length - 1].text += ' ' + line.trim();
} else {
// Create a new segment with time 0 if no previous segment
segments.push({ time: 0, text: line.trim() });
}
}
}
// Process segments to create naturally formatted text
let formattedText = '';
let lastTime = -1;
for (let i = 0; i < segments.length; i++) {
const segment = segments[i];
// First segment or determine spacing based on time gap
if (i === 0) {
formattedText = segment.text;
} else {
const timeGap = segment.time - lastTime;
// Rule 1: Paragraph break for gaps > paragraph_gap seconds
if (timeGap > paragraph_gap) {
formattedText += '\n\n' + segment.text;
}
// Rule 2: Line break for gaps > line_gap seconds
else if (timeGap > line_gap) {
formattedText += '\n' + segment.text;
}
// Rule 3: Apply natural grammar rules
else {
// Check if we should add space or join without space
const lastChar = formattedText.charAt(formattedText.length - 1);
const endsWithSentenceMarker = /[.!?]$/.test(formattedText);
const startsWithLowerCase = /^[a-z]/.test(segment.text);
if (endsWithSentenceMarker) {
// Start a new sentence
formattedText += ' ' + segment.text;
} else if (lastChar === ',' || lastChar === ';' || lastChar === ':') {
// Continue after punctuation
formattedText += ' ' + segment.text;
} else if (startsWithLowerCase) {
// Likely continuing a thought
formattedText += ' ' + segment.text;
} else {
// Otherwise just add space
formattedText += ' ' + segment.text;
}
}
}
lastTime = segment.time;
}
return { formatted_text: formattedText };
} catch (error) {
throw new Error(`Formatting process failed: ${error instanceof Error ? error.message : String(error)}`);
}
}