YouTube Transcript MCP Server

url-normalize.ts•8.89 KiB

import Url from 'url-parse'; /** * Checks if the given URL is a valid YouTube video URL. * Handles various YouTube domain formats (youtube.com, youtu.be, m.youtube.com, international domains) * and path formats (/watch, /live, /embed, /shorts). * @param url The URL to validate. * @returns True if the URL is a valid YouTube video URL, false otherwise. */ export function isValidYouTubeUrl(url: string): boolean { if (!url) { return false; } const parsedUrl = new Url(url, true) as Url<any>; // true to parse query string const validHostnames = [ 'youtube.com', 'www.youtube.com', 'm.youtube.com', 'youtu.be', // Common international domains (this list can be expanded) 'youtube.co.uk', 'youtube.de', 'youtube.fr', 'youtube.jp', 'youtube.ca', 'youtube.es', 'youtube.br', 'youtube.com.br', 'youtube.co.in', 'youtube.co.kr', ]; // Remove 'www.' for simpler hostname matching const hostname = parsedUrl.hostname.startsWith('www.') ? parsedUrl.hostname.substring(4) : parsedUrl.hostname; if (!validHostnames.includes(hostname)) { return false; } const videoId = extractVideoIdFromParsedUrl(parsedUrl); return !!videoId; // If we can extract a video ID, consider it valid for our purposes } /** * Extracts the YouTube video ID from a pre-parsed URL object. * This is an internal helper function. * @param parsedUrl The parsed URL object from url-parse. * @returns The YouTube video ID, or null if not found. */ function extractVideoIdFromParsedUrl(parsedUrl: Url<any>): string | null { const pathname = parsedUrl.pathname; const query = parsedUrl.query; // Parsed query object if (parsedUrl.hostname === 'youtu.be') { // For youtu.be URLs, the ID is the first part of the path const videoId = pathname.split('/')[1]; return videoId || null; } if (pathname.startsWith('/watch') && query.v) { return Array.isArray(query.v) ? query.v[0] : query.v; } if (pathname.startsWith('/live/')) { const parts = pathname.split('/'); return parts[2] || null; } if (pathname.startsWith('/embed/')) { const parts = pathname.split('/'); return parts[2] || null; } if (pathname.startsWith('/shorts/')) { const parts = pathname.split('/'); return parts[2] || null; } // Check for video ID in query parameters for root paths on m.youtube.com etc. // e.g. https://m.youtube.com/?v=VIDEO_ID (less common but possible) if (query.v && (pathname === '/' || pathname === '' )) { return Array.isArray(query.v) ? query.v[0] : query.v; } return null; } /** * Extracts the clean YouTube video ID from a URL. * @param url The YouTube URL. * @returns The video ID, or null if the URL is invalid or ID cannot be found. */ export function extractVideoId(url: string): string | null { if (!url) { return null; } try { const parsedUrl = new Url(url, true) as Url<any>; // Added <any> for query type return extractVideoIdFromParsedUrl(parsedUrl); } catch (e) { // url-parse might throw on severely malformed URLs return null; } } /** * Removes tracking parameters and normalizes the query string for a YouTube URL. * This function primarily aims to get the base URL with only the video ID. * For non-video URLs or URLs where a video ID isn't primary, its behavior might be simple. * @param url The YouTube URL string. * @returns A cleaner URL string, typically with only the video ID parameter if applicable. */ export function cleanTrackingParams(url: string): string { if (!url) { return url; // Return original if empty or null } try { const parsedUrl = new Url(url, true) as Url<any>; // Added <any> for query type const videoId = extractVideoIdFromParsedUrl(parsedUrl); if (videoId) { // If it's a known video URL structure, normalize to the standard watch?v= format // This inherently cleans other params for these structures. if (parsedUrl.hostname === 'youtu.be' || parsedUrl.pathname.startsWith('/live/') || parsedUrl.pathname.startsWith('/embed/') || parsedUrl.pathname.startsWith('/shorts/')) { return `https://www.youtube.com/watch?v=${videoId}`; } // For /watch URLs, rebuild with only 'v' if (parsedUrl.pathname.startsWith('/watch')) { const protocol = parsedUrl.protocol || 'https'; return `${protocol}//www.youtube.com/watch?v=${videoId}`; } } // Fallback for other URLs or if videoId couldn't be cleanly extracted // by the logic above, but we still want to try cleaning. // Rebuild the URL with only essential parameters (if any). // For YouTube, 'v' is the primary one we care about for video pages. // This part might be too aggressive or not aggressive enough depending on // the types of "other" YouTube URLs one might encounter. // Given the project's focus on video transcripts, this is a reasonable default. let newQuery: Record<string, any> = {}; // Changed to Record<string, any> if (parsedUrl.query && parsedUrl.query.v) { // Added check for parsedUrl.query existence newQuery = { v: parsedUrl.query.v }; } // Potentially add other "essential" params if needed in the future. parsedUrl.set('query', newQuery); // Ensure standard hostname for consistency if it was an m.youtube.com or other variant if (parsedUrl.hostname && parsedUrl.hostname.includes('youtube.')) { // Added check for parsedUrl.hostname existence parsedUrl.set('hostname', 'www.youtube.com'); } // Ensure https parsedUrl.set('protocol', 'https'); return parsedUrl.toString(); } catch (e) { // If parsing fails, return the original URL return url; } } /** * Normalizes a YouTube URL to the format: `https://www.youtube.com/watch?v=VIDEO_ID`. * @param url The YouTube URL to normalize. * @returns The normalized URL, or the original URL if it cannot be normalized or is invalid. * Consider throwing an error for truly invalid URLs if stricter handling is needed. */ export function normalizeYouTubeUrl(url: string): string { const videoId = extractVideoId(url); if (videoId) { return `https://www.youtube.com/watch?v=${videoId}`; } // As per instructions: "Return clear error messages for invalid URLs" // Throwing an error is a way to provide a clear message. // Alternatively, could return a specific string like "invalid_youtube_url" // or follow Postel's law and return the original URL if unnormalizable. // The requirement "Always normalize to: https://www.youtube.com/watch?v=VIDEO_ID" // implies that if it can't, it's an issue. // For now, returning original URL if video ID not found. // This can be made stricter by throwing an error. console.warn(`Could not normalize URL, video ID not found: ${url}`); return url; // Or throw new Error(`Invalid or non-video YouTube URL: ${url}`); } // Example Usage (can be removed or kept for testing) /* const urlsToTest = [ 'http://www.youtube.com/watch?v=VIDEO_ID&feature=feedrec_grec_index', 'http://www.youtube.com/user/USERNAME#p/a/u/1/VIDEO_ID', 'http://www.youtube.com/v/VIDEO_ID?fs=1&hl=en_US&rel=0', 'http://www.youtube.com/watch?v=VIDEO_ID#t=0m10s', 'http://www.youtube.com/embed/VIDEO_ID?rel=0', 'http://www.youtube.com/live/VIDEO_ID?si=TRACKING_PARAM', 'https://www.youtube.com/watch?v=VIDEO_ID&t=123s&si=TRACKING_PARAM', 'https://youtu.be/VIDEO_ID?si=TRACKING_PARAM', 'https://m.youtube.com/watch?v=VIDEO_ID', 'https://youtube.com/watch?v=VIDEO_ID', 'youtube.com/shorts/VIDEO_ID', 'https://www.youtube.com/playlist?list=PLAYLIST_ID&v=VIDEO_ID_IN_PLAYLIST', // Should extract VIDEO_ID_IN_PLAYLIST 'https://youtube.co.uk/watch?v=VIDEO_ID', 'https://youtube.de/watch?v=VIDEO_ID', 'https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PL মাসুদ_অবুঝ_মন&index=1&ab_channel=RickAstley', // complex list param 'https://www.youtube.com/watch?app=desktop&v=VIDEO_ID', 'https://m.youtube.com/watch?app=desktop&v=VIDEO_ID' ]; urlsToTest.forEach(testUrl => { console.log(`Original: ${testUrl}`); console.log(`Valid?: ${isValidYouTubeUrl(testUrl)}`); const videoId = extractVideoId(testUrl); console.log(`Video ID: ${videoId}`); if (videoId) { console.log(`Normalized: ${normalizeYouTubeUrl(testUrl)}`); console.log(`Cleaned: ${cleanTrackingParams(testUrl)}`); } console.log('---'); }); */ // Handling edge cases from requirements: // - Validate YouTube URL format (covered by isValidYouTubeUrl, extractVideoId implicitly) // - Return clear error messages for invalid URLs (normalizeYouTubeUrl logs a warning, can be changed to throw error) // - Handle edge cases (private videos, age-restricted content): // These are more about content accessibility than URL structure. // The normalization will still produce a valid URL structure for them. // The actual fetching in `youtube.ts` (Phase 3) would encounter errors for these. // The `isValidYouTubeUrl` checks the *format*, not content availability.

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/RahulPatkiWork/youtube-transcript-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

url-normalize.ts•8.89 KiB