Limitless MCP Server

Overview Schema Related Servers Score Discussions

mcp-limitless
src

transcript-extraction.ts•16.5 KiB

import { Lifelog, LifelogContentNode } from "./limitless-client.js"; // ============================================================================= // TRANSCRIPT EXTRACTION AND FORMATTING // ============================================================================= export interface TranscriptOptions { format: "raw_text" | "verbatim" | "structured" | "timestamps" | "speakers_only"; includeTimestamps?: boolean; includeSpeakers?: boolean; includeContext?: boolean; preserveFormatting?: boolean; timeFormat?: "offset" | "absolute" | "duration"; speakerFormat?: "names" | "identifiers" | "both"; } export interface TranscriptSegment { speaker?: string; speakerIdentifier?: "user" | null; content: string; startTime?: string; endTime?: string; startOffsetMs?: number; endOffsetMs?: number; duration?: number; type: string; } export interface DetailedTranscript { lifelogId: string; title: string; startTime: string; endTime: string; totalDuration: number; segments: TranscriptSegment[]; metadata: { speakerCount: number; uniqueSpeakers: string[]; wordCount: number; averageSegmentLength: number; technicalTermsFound: string[]; numbersAndFigures: string[]; keyPhrases: string[]; }; rawText: string; formattedTranscript: string; } export class TranscriptExtractor { /** * Extract raw, unformatted transcript from lifelog data * Optimized for AI processing with maximum context preservation */ static extractRawTranscript(lifelog: Lifelog, options: TranscriptOptions = { format: "structured" }): DetailedTranscript { const { format = "structured", includeTimestamps = true, includeSpeakers = true, includeContext = true, preserveFormatting = false, timeFormat = "absolute", speakerFormat = "names" } = options; if (!lifelog.contents || lifelog.contents.length === 0) { return this.createEmptyTranscript(lifelog); } // Extract all conversation segments with full context const segments = this.extractSegments(lifelog.contents, { includeTimestamps, includeSpeakers, includeContext, timeFormat, speakerFormat }); // Analyze content for technical terms, figures, and key phrases const metadata = this.analyzeContent(segments); // Generate different format outputs const rawText = this.generateRawText(segments, preserveFormatting); const formattedTranscript = this.generateFormattedTranscript(segments, format, options); const totalDuration = new Date(lifelog.endTime).getTime() - new Date(lifelog.startTime).getTime(); return { lifelogId: lifelog.id, title: lifelog.title || "Untitled Conversation", startTime: lifelog.startTime, endTime: lifelog.endTime, totalDuration, segments, metadata, rawText, formattedTranscript }; } /** * Extract segments with maximum detail preservation */ private static extractSegments( contents: LifelogContentNode[], options: { includeTimestamps: boolean; includeSpeakers: boolean; includeContext: boolean; timeFormat: "offset" | "absolute" | "duration"; speakerFormat: "names" | "identifiers" | "both"; } ): TranscriptSegment[] { const segments: TranscriptSegment[] = []; for (const node of contents) { if (!node.content || node.content.trim().length === 0) continue; // Preserve all content types, not just blockquotes const segment: TranscriptSegment = { content: node.content.trim(), type: node.type, startTime: node.startTime, endTime: node.endTime, startOffsetMs: node.startOffsetMs, endOffsetMs: node.endOffsetMs }; // Calculate duration if timestamps available if (node.startOffsetMs !== undefined && node.endOffsetMs !== undefined) { segment.duration = node.endOffsetMs - node.startOffsetMs; } // Include speaker information with full context if (options.includeSpeakers && node.speakerName) { switch (options.speakerFormat) { case "names": segment.speaker = node.speakerName; break; case "identifiers": segment.speaker = node.speakerIdentifier || node.speakerName; break; case "both": segment.speaker = node.speakerName; segment.speakerIdentifier = node.speakerIdentifier; break; } } segments.push(segment); // Process children recursively to preserve nested content if (node.children && node.children.length > 0) { const childSegments = this.extractSegments(node.children, options); segments.push(...childSegments); } } return segments; } /** * Analyze content for technical terms, figures, and important details */ private static analyzeContent(segments: TranscriptSegment[]): any { const allText = segments.map(s => s.content).join(" "); const uniqueSpeakers = new Set(segments.map(s => s.speaker).filter(Boolean)); // Extract technical terms (scientific, medical, business) const technicalTerms = this.extractTechnicalTerms(allText); // Extract numbers, figures, percentages, dates const numbersAndFigures = this.extractNumbersAndFigures(allText); // Extract key phrases and important concepts const keyPhrases = this.extractKeyPhrases(allText); // Calculate statistics const wordCount = allText.split(/\s+/).length; const averageSegmentLength = segments.length > 0 ? wordCount / segments.length : 0; return { speakerCount: uniqueSpeakers.size, uniqueSpeakers: Array.from(uniqueSpeakers), wordCount, averageSegmentLength: Math.round(averageSegmentLength), technicalTermsFound: technicalTerms, numbersAndFigures, keyPhrases }; } /** * Extract technical terminology with precision */ private static extractTechnicalTerms(text: string): string[] { const technicalPatterns = [ // Scientific terms /\b[A-Z][a-z]+(?:ine|ase|oid|gen|ide|ate|ium|sis|tion|logy|graphy|metry|scopy)\b/g, // Medical terms /\b(?:diagnosis|treatment|therapy|syndrome|pathology|cardio|neuro|gastro|pulmonary|hepatic|renal|oncology|immunology)\w*\b/gi, // Technical abbreviations /\b[A-Z]{2,6}\b(?:\s*[A-Z]{2,6})*\b/g, // Chemical formulas and compounds /\b[A-Z][a-z]?(?:\d+[A-Za-z]*)*\b/g, // Units and measurements /\b\d+(?:\.\d+)?\s*(?:mg|kg|ml|cm|mm|km|hz|ghz|mb|gb|tb|fps|rpm|°[CF]|%)\b/gi, // Software/tech terms /\b(?:API|SDK|REST|GraphQL|JSON|XML|HTTP|HTTPS|SQL|NoSQL|CI\/CD|DevOps|ML|AI|GPU|CPU|RAM|SSD|IoT|VR|AR)\b/gi ]; const terms = new Set<string>(); for (const pattern of technicalPatterns) { const matches = text.match(pattern) || []; matches.forEach(match => { if (match.length > 2) { // Filter out very short matches terms.add(match); } }); } return Array.from(terms).slice(0, 20); // Top 20 technical terms } /** * Extract numbers, figures, percentages, and quantitative data */ private static extractNumbersAndFigures(text: string): string[] { const numberPatterns = [ // Percentages /\b\d+(?:\.\d+)?%\b/g, // Currency /\$\d+(?:,\d{3})*(?:\.\d{2})?\b|\b\d+(?:,\d{3})*(?:\.\d{2})?\s*(?:dollars?|USD|EUR|GBP)\b/gi, // Large numbers with commas /\b\d{1,3}(?:,\d{3})+(?:\.\d+)?\b/g, // Decimal numbers with context /\b\d+\.\d+\s*(?:million|billion|thousand|k)\b/gi, // Time/duration /\b\d+(?:\.\d+)?\s*(?:hours?|minutes?|seconds?|days?|weeks?|months?|years?)\b/gi, // Ratios and fractions /\b\d+:\d+\b|\b\d+\/\d+\b/g, // Scientific notation /\b\d+(?:\.\d+)?[eE][+-]?\d+\b/g, // Dates with specific formats /\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b/gi, /\b\d{1,2}\/\d{1,2}\/\d{2,4}\b/g, // Version numbers /\bv?\d+\.\d+(?:\.\d+)*\b/gi ]; const figures = new Set<string>(); for (const pattern of numberPatterns) { const matches = text.match(pattern) || []; matches.forEach(match => figures.add(match)); } return Array.from(figures).slice(0, 30); // Top 30 figures } /** * Extract key phrases and important concepts */ private static extractKeyPhrases(text: string): string[] { const keyPhrasePatterns = [ // Decision indicators /\b(?:we decided|decision was|agreed to|concluded that|determined that|final decision|going with|chose to)\s+[^.!?]*[.!?]/gi, // Action indicators /\b(?:action item|next step|follow up|we need to|I will|should do|must complete|deadline|by \w+day)\s+[^.!?]*[.!?]/gi, // Important qualifiers /\b(?:critical|essential|important|urgent|priority|significant|major|key|primary|main|core)\s+[^.!?]*[.!?]/gi, // Problem/solution indicators /\b(?:problem is|issue with|challenge|solution|fix|resolve|address)\s+[^.!?]*[.!?]/gi, // Research/findings /\b(?:study shows|research indicates|data suggests|findings|results|analysis|statistics|evidence)\s+[^.!?]*[.!?]/gi, // Quotes and references /(?:"[^"]*"|according to|as mentioned|referenced|cited|source)/gi ]; const phrases = new Set<string>(); for (const pattern of keyPhrasePatterns) { const matches = text.match(pattern) || []; matches.forEach(match => { if (match.length > 10 && match.length < 200) { // Reasonable phrase length phrases.add(match.trim()); } }); } return Array.from(phrases).slice(0, 15); // Top 15 key phrases } /** * Generate clean raw text optimized for AI processing */ private static generateRawText(segments: TranscriptSegment[], preserveFormatting: boolean): string { if (!preserveFormatting) { // Clean text optimized for AI - remove all formatting, just content return segments .filter(s => s.type === "blockquote" || s.type.startsWith("heading")) .map(s => s.content) .join(" ") .replace(/\s+/g, " ") .trim(); } // Preserve some structure for readability return segments .map(s => { let text = s.content; if (s.speaker) { text = `${s.speaker}: ${text}`; } return text; }) .join("\n") .trim(); } /** * Generate formatted transcript based on specified format */ private static generateFormattedTranscript( segments: TranscriptSegment[], format: string, options: TranscriptOptions ): string { switch (format) { case "raw_text": return segments.map(s => s.content).join(" "); case "verbatim": return segments .filter(s => s.speaker) // Only spoken content .map(s => `${s.speaker}: ${s.content}`) .join("\n"); case "timestamps": return segments .filter(s => s.startTime) .map(s => { const time = s.startTime ? new Date(s.startTime).toLocaleTimeString() : ""; const speaker = s.speaker ? `${s.speaker}: ` : ""; return `[${time}] ${speaker}${s.content}`; }) .join("\n"); case "speakers_only": return segments .filter(s => s.speaker && s.type === "blockquote") .map(s => `**${s.speaker}:** ${s.content}`) .join("\n\n"); case "structured": default: // Comprehensive structured format with all context let result = ""; let currentSpeaker = ""; for (const segment of segments) { if (segment.type.startsWith("heading")) { result += `\n# ${segment.content}\n\n`; } else if (segment.speaker && segment.speaker !== currentSpeaker) { currentSpeaker = segment.speaker; const timestamp = segment.startTime ? ` (${new Date(segment.startTime).toLocaleTimeString()})` : ""; result += `\n**${segment.speaker}${timestamp}:**\n`; result += `${segment.content}\n`; } else if (segment.speaker) { result += `${segment.content}\n`; } else { result += `\n*${segment.content}*\n`; } } return result.trim(); } } /** * Create empty transcript for lifelogs with no content */ private static createEmptyTranscript(lifelog: Lifelog): DetailedTranscript { return { lifelogId: lifelog.id, title: lifelog.title || "Empty Lifelog", startTime: lifelog.startTime, endTime: lifelog.endTime, totalDuration: new Date(lifelog.endTime).getTime() - new Date(lifelog.startTime).getTime(), segments: [], metadata: { speakerCount: 0, uniqueSpeakers: [], wordCount: 0, averageSegmentLength: 0, technicalTermsFound: [], numbersAndFigures: [], keyPhrases: [] }, rawText: "", formattedTranscript: "No content available." }; } /** * Extract multiple lifelogs and combine into comprehensive transcript */ static extractMultipleTranscripts( lifelogs: Lifelog[], options: TranscriptOptions = { format: "structured" } ): { combinedTranscript: string; individualTranscripts: DetailedTranscript[]; aggregatedMetadata: any; } { const individualTranscripts = lifelogs.map(lifelog => this.extractRawTranscript(lifelog, options) ); const combinedTranscript = individualTranscripts .map(t => t.formattedTranscript) .join("\n\n---\n\n"); // Aggregate metadata across all transcripts const aggregatedMetadata = { totalLifelogs: lifelogs.length, totalDuration: individualTranscripts.reduce((sum, t) => sum + t.totalDuration, 0), totalWordCount: individualTranscripts.reduce((sum, t) => sum + t.metadata.wordCount, 0), uniqueSpeakersAcrossAll: Array.from(new Set( individualTranscripts.flatMap(t => t.metadata.uniqueSpeakers) )), allTechnicalTerms: Array.from(new Set( individualTranscripts.flatMap(t => t.metadata.technicalTermsFound) )), allNumbersAndFigures: Array.from(new Set( individualTranscripts.flatMap(t => t.metadata.numbersAndFigures) )), allKeyPhrases: Array.from(new Set( individualTranscripts.flatMap(t => t.metadata.keyPhrases) )) }; return { combinedTranscript, individualTranscripts, aggregatedMetadata }; } }

Loading blob content...

Implementation Reference

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/199-mcp/mcp-limitless'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

transcript-extraction.ts•16.5 KiB