YouTube Transcript MCP Server

youtube.ts•19.9 KiB

// @ts-ignore - no types available for youtube-transcript-api import TranscriptAPI from 'youtube-transcript-api'; import { Innertube } from 'youtubei.js'; import { z } from 'zod'; import type { TranscriptResponse, TranscriptSegment, SearchResult, SearchOptions, ChannelVideo, PlaylistVideo, PlaylistInfo } from '../types/index.js'; const VideoUrlSchema = z.string().refine( (url) => /^(https?:\/\/)?(www\.)?(youtube\.com\/watch\?v=|youtu\.be\/)[\w-]+/.test(url), "Invalid YouTube URL format" ); const ChannelUrlSchema = z.string().refine( (url) => /^(https?:\/\/)?(www\.)?youtube\.com\/(channel\/|c\/|@|user\/)[\w-]+/.test(url), "Invalid YouTube channel URL format" ); const PlaylistUrlSchema = z.string().refine( (url) => /^(https?:\/\/)?(www\.)?youtube\.com\/(playlist\?list=|watch\?v=[\w-]+&list=)[\w-]+/.test(url), "Invalid YouTube playlist URL format" ); export class YouTubeService { private yt: Innertube | null = null; constructor() { // No initialization needed for v2.0.4 API } private async getYouTubeClient(): Promise<Innertube> { if (!this.yt) { this.yt = await Innertube.create(); } return this.yt; } private extractVideoId(url: string): string { const match = url.match(/(?:youtube\.com\/watch\?v=|youtu\.be\/)([^&\n?#]+)/); if (!match || !match[1]) { throw new Error('Could not extract video ID from URL'); } return match[1]; } async getTranscript(url: string, language?: string): Promise<TranscriptResponse> { VideoUrlSchema.parse(url); try { const videoId = this.extractVideoId(url); // Use the v2.0.4 static API method const transcriptArray = await TranscriptAPI.getTranscript(videoId, language || 'en'); const segments: TranscriptSegment[] = transcriptArray.map((item: any) => ({ text: item.text, start: parseFloat(item.start || '0'), duration: parseFloat(item.duration || item.dur || '0'), end: parseFloat(item.start || '0') + parseFloat(item.duration || item.dur || '0') })); const totalDuration = segments.length > 0 ? Math.max(...segments.map(s => s.end)) : 0; return { videoId, title: '', // v2.0.4 doesn't provide title language: language || 'en', segments, totalDuration }; } catch (error) { throw new Error(`Failed to fetch transcript: ${error instanceof Error ? error.message : 'Unknown error'}`); } } async searchTranscript( transcript: TranscriptResponse, options: SearchOptions ): Promise<SearchResult[]> { const { query, contextWindow = 30, caseSensitive = false } = options; const searchTerm = caseSensitive ? query : query.toLowerCase(); const results: SearchResult[] = []; transcript.segments.forEach((segment, index) => { const text = caseSensitive ? segment.text : segment.text.toLowerCase(); if (text.includes(searchTerm)) { const contextStart = Math.max(0, index - 2); const contextEnd = Math.min(transcript.segments.length, index + 3); const before = transcript.segments.slice(contextStart, index); const after = transcript.segments.slice(index + 1, contextEnd); results.push({ segment, context: { before, after }, matchIndex: index }); } }); return results; } async batchTranscripts(urls: string[], maxConcurrent = 3): Promise<TranscriptResponse[]> { const results: TranscriptResponse[] = []; const errors: Array<{url: string, error: string}> = []; for (let i = 0; i < urls.length; i += maxConcurrent) { const batch = urls.slice(i, i + maxConcurrent); const batchPromises = batch.map(async (url): Promise<{success: true, data: TranscriptResponse} | {success: false, url: string, error: string}> => { try { const transcript = await this.getTranscript(url); return { success: true, data: transcript }; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; errors.push({ url, error: errorMessage }); return { success: false, url, error: errorMessage }; } }); const batchResults = await Promise.all(batchPromises); // Add successful transcripts to results for (const result of batchResults) { if (result.success) { results.push(result.data); } } // Add a small delay between batches to avoid rate limiting if (i + maxConcurrent < urls.length) { await new Promise(resolve => setTimeout(resolve, 1000)); } } // Log errors but don't fail the entire operation if (errors.length > 0) { console.error(`Failed to get transcripts for ${errors.length} videos:`, errors); } return results; } formatAsText(transcript: TranscriptResponse): string { return transcript.segments.map(segment => segment.text).join(' '); } formatAsSRT(transcript: TranscriptResponse): string { return transcript.segments.map((segment, index) => { const startTime = this.formatTime(segment.start); const endTime = this.formatTime(segment.end); return `${index + 1}\n${startTime} --> ${endTime}\n${segment.text}\n`; }).join('\n'); } private formatTime(seconds: number): string { const hours = Math.floor(seconds / 3600); const minutes = Math.floor((seconds % 3600) / 60); const secs = Math.floor(seconds % 60); const ms = Math.floor((seconds % 1) * 1000); return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')},${ms.toString().padStart(3, '0')}`; } async getChannelVideos(channelUrl: string, maxVideos = 50): Promise<ChannelVideo[]> { ChannelUrlSchema.parse(channelUrl); try { const yt = await this.getYouTubeClient(); // Extract channel identifier from URL const channelId = this.extractChannelId(channelUrl); // For handle-based URLs, try direct access first, then fall back to search if (channelId.startsWith('@')) { try { // First try direct channel access return await this.getChannelVideosByChannelId(channelUrl, channelId, maxVideos); } catch (directError) { // Fall back to search-based approach console.error(`Direct channel access failed for ${channelId}, falling back to search:`, directError); return await this.getChannelVideosBySearch(channelUrl, channelId, maxVideos); } } // For channel IDs, use traditional approach return await this.getChannelVideosByChannelId(channelUrl, channelId, maxVideos); } catch (error) { throw new Error(`Failed to fetch channel videos: ${error instanceof Error ? error.message : 'Unknown error'}`); } } private async getChannelVideosBySearch(channelUrl: string, channelId: string, maxVideos: number): Promise<ChannelVideo[]> { const yt = await this.getYouTubeClient(); const handleName = channelId.substring(1); // Remove @ // First, try to find the actual channel name from a channel search let actualChannelName = ''; try { const channelSearch = await yt.search(handleName, { type: 'channel' }); if (channelSearch.results && channelSearch.results.length > 0) { const firstChannel = channelSearch.results[0] as any; actualChannelName = firstChannel.title?.text || firstChannel.name?.text || ''; } } catch (e) { // Continue with handle-based search } // Try multiple search strategies including known name patterns const searchQueries = [ handleName, `"${handleName}"`, `${handleName} channel` ]; // Add common name variations for known handles const nameVariations = this.getChannelNameVariations(handleName); if (nameVariations.length > 0) { searchQueries.unshift(...nameVariations); } // If we found an actual channel name, add it to search queries if (actualChannelName && actualChannelName !== handleName) { searchQueries.unshift( actualChannelName, `"${actualChannelName}"` ); } // Collect videos from ALL search queries to get comprehensive results const allTargetVideos: ChannelVideo[] = []; const seenVideoIds = new Set<string>(); for (const query of searchQueries) { try { const searchResults = await yt.search(query, { type: 'video', sort_by: 'upload_date' }); if (searchResults.results && searchResults.results.length > 0) { for (const video of searchResults.results) { if (allTargetVideos.length >= maxVideos) break; const videoData = video as any; const authorName = videoData.author?.name?.text || videoData.author?.name || videoData.channel?.name?.text || videoData.channel?.name || ''; const channelHandle = videoData.author?.handle || ''; const videoId = videoData.video_id || videoData.id || ''; // Skip duplicates if (!videoId || seenVideoIds.has(videoId)) continue; // More flexible channel matching - try different approaches const authorNameLower = authorName.toLowerCase(); const handleNameLower = handleName.toLowerCase(); // Generate possible name variations dynamically const spacedHandle = handleName.replace(/([a-z])([A-Z])/g, '$1 $2').replace(/[_-]/g, ' ').toLowerCase(); const isMatchingChannel = channelHandle === channelId || channelHandle === `@${handleName}` || authorNameLower === handleNameLower || authorNameLower === spacedHandle || authorNameLower.includes(handleNameLower) || handleNameLower.includes(authorNameLower.split(' ')[0]) || // Match first name (authorNameLower.split(' ').some((word: string) => word.includes(handleNameLower))) || (spacedHandle.split(' ').every((word: string) => word.length > 1 && authorNameLower.includes(word))) || // All words from spaced handle appear in author name (actualChannelName && authorNameLower === actualChannelName.toLowerCase()) || // Exact match with found channel name (actualChannelName && authorNameLower.includes(actualChannelName.toLowerCase())) if (isMatchingChannel) { seenVideoIds.add(videoId); allTargetVideos.push({ id: videoId, title: videoData.title?.text || videoData.title || '', url: `https://www.youtube.com/watch?v=${videoId}`, description: videoData.description?.text || videoData.description || '', publishedAt: videoData.published?.text || videoData.published || '', duration: videoData.length_text?.text || videoData.duration?.text || videoData.duration || '', viewCount: videoData.view_count?.text || videoData.view_count || '', channelTitle: authorName, channelUrl: channelUrl }); } } } } catch (searchError) { continue; // Try next search query } } // Return all videos found across all search queries if (allTargetVideos.length > 0) { return allTargetVideos; } throw new Error(`Could not find videos for channel handle: ${channelId}`); } private getChannelNameVariations(handleName: string): string[] { // Generate dynamic variations instead of hardcoding const variations: string[] = []; // Add the handle name in different formats variations.push(handleName); variations.push(`"${handleName}"`); // Try splitting camelCase or underscores and adding spaces const spacedName = handleName.replace(/([a-z])([A-Z])/g, '$1 $2').replace(/[_-]/g, ' '); if (spacedName !== handleName) { variations.push(spacedName); variations.push(`"${spacedName}"`); } // Try capitalizing first letters const capitalizedName = spacedName.split(' ').map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase() ).join(' '); if (capitalizedName !== spacedName) { variations.push(capitalizedName); variations.push(`"${capitalizedName}"`); } return variations; } private async getChannelVideosByChannelId(channelUrl: string, channelId: string, maxVideos: number): Promise<ChannelVideo[]> { const yt = await this.getYouTubeClient(); // Try direct channel access with fallback strategies let channel; let lastError; try { channel = await yt.getChannel(channelId); } catch (error) { lastError = error; // Try with different format variations for channel IDs const variations = [ `@${channelId}`, `UC${channelId}`, channelId.replace(/^UC/, '') ]; for (const variation of variations) { try { channel = await yt.getChannel(variation); break; } catch (e) { continue; } } if (!channel) { throw lastError; } } const videos = channel.videos; const channelVideos: ChannelVideo[] = []; let count = 0; for (const video of videos) { if (count >= maxVideos) break; // Type guard and safe property access const videoId = (video as any).id || ''; const title = (video as any).title?.text || (video as any).title || ''; const description = (video as any).description?.text || (video as any).description || ''; const publishedAt = (video as any).published?.text || (video as any).published || ''; const duration = (video as any).duration?.text || (video as any).duration || ''; const viewCount = (video as any).view_count?.text || (video as any).view_count || ''; if (videoId) { channelVideos.push({ id: videoId, title, url: `https://www.youtube.com/watch?v=${videoId}`, description, publishedAt, duration, viewCount, channelTitle: (channel.header as any)?.title?.text || (channel as any).metadata?.title || '', channelUrl: channelUrl }); } count++; } return channelVideos; } async getChannelVideoUrls(channelUrl: string, maxVideos = 50): Promise<string[]> { const videos = await this.getChannelVideos(channelUrl, maxVideos); return videos.map(video => video.url); } async getChannelTranscripts(channelUrl: string, maxVideos = 50, maxConcurrent = 3): Promise<TranscriptResponse[]> { const videoUrls = await this.getChannelVideoUrls(channelUrl, maxVideos); return this.batchTranscripts(videoUrls, maxConcurrent); } private extractChannelId(url: string): string { // Handle different channel URL formats if (url.includes('/channel/')) { const match = url.match(/\/channel\/([^/?]+)/); return match ? match[1] : ''; } if (url.includes('/@')) { const match = url.match(/\/@([^/?]+)/); return match ? `@${match[1]}` : ''; } if (url.includes('/c/')) { const match = url.match(/\/c\/([^/?]+)/); return match ? match[1] : ''; } if (url.includes('/user/')) { const match = url.match(/\/user\/([^/?]+)/); return match ? match[1] : ''; } throw new Error('Could not extract channel identifier from URL'); } private extractPlaylistId(url: string): string { const match = url.match(/[?&]list=([^&]+)/); if (!match || !match[1]) { throw new Error('Could not extract playlist ID from URL'); } return match[1]; } async getPlaylistInfo(playlistUrl: string): Promise<PlaylistInfo> { PlaylistUrlSchema.parse(playlistUrl); try { const yt = await this.getYouTubeClient(); const playlistId = this.extractPlaylistId(playlistUrl); const playlist = await yt.getPlaylist(playlistId); // Get video count from the videos iterator let videoCount = 0; try { const videos = playlist.videos; for (const _ of videos) { videoCount++; // Count up to 200 max to avoid long iteration if (videoCount >= 200) break; } } catch (e) { // If we can't count videos, try to get from metadata videoCount = (playlist as any).video_count || (playlist as any).estimatedVideoCount || (playlist as any).sidebar?.stats?.runs?.[0]?.text?.replace(/[^\d]/g, '') || 0; } // Extract better metadata from playlist object const metadata = (playlist as any).metadata || (playlist as any).header || playlist; const sidebar = (playlist as any).sidebar || {}; const owner = sidebar.owner || sidebar.videoOwner || {}; return { id: playlistId, title: metadata.title?.text || metadata.title || '', description: metadata.description?.text || metadata.description || '', channelTitle: owner.name?.text || owner.title?.text || metadata.author?.name?.text || metadata.channelTitle || '', channelUrl: owner.url || owner.navigationEndpoint?.commandMetadata?.webCommandMetadata?.url || metadata.author?.url || '', videoCount: typeof videoCount === 'string' ? parseInt(videoCount) : videoCount, url: playlistUrl }; } catch (error) { throw new Error(`Failed to fetch playlist info: ${error instanceof Error ? error.message : 'Unknown error'}`); } } async getPlaylistVideos(playlistUrl: string, maxVideos = 50): Promise<PlaylistVideo[]> { PlaylistUrlSchema.parse(playlistUrl); try { const yt = await this.getYouTubeClient(); const playlistId = this.extractPlaylistId(playlistUrl); const playlist = await yt.getPlaylist(playlistId); const videos = playlist.videos; const playlistVideos: PlaylistVideo[] = []; let count = 0; let position = 1; for (const video of videos) { if (count >= maxVideos) break; const videoId = (video as any).id || ''; const title = (video as any).title?.text || (video as any).title || ''; const description = (video as any).description?.text || (video as any).description || ''; const publishedAt = (video as any).published?.text || (video as any).published || ''; const duration = (video as any).duration?.text || (video as any).duration || ''; const viewCount = (video as any).view_count?.text || (video as any).view_count || ''; const channelTitle = (video as any).author?.name?.text || (video as any).author?.name || ''; const channelUrl = (video as any).author?.url || ''; if (videoId) { playlistVideos.push({ id: videoId, title, url: `https://www.youtube.com/watch?v=${videoId}`, description, publishedAt, duration, viewCount, channelTitle, channelUrl, playlistPosition: position }); position++; } count++; } return playlistVideos; } catch (error) { throw new Error(`Failed to fetch playlist videos: ${error instanceof Error ? error.message : 'Unknown error'}`); } } async getPlaylistVideoUrls(playlistUrl: string, maxVideos = 50): Promise<string[]> { const videos = await this.getPlaylistVideos(playlistUrl, maxVideos); return videos.map(video => video.url); } async getPlaylistTranscripts(playlistUrl: string, maxVideos = 50, maxConcurrent = 3): Promise<TranscriptResponse[]> { const videoUrls = await this.getPlaylistVideoUrls(playlistUrl, maxVideos); return this.batchTranscripts(videoUrls, maxConcurrent); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/emit-ia/youtube-transcript-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

youtube.ts•19.9 KiB