DeepSRT MCP Server

Official

Overview InspectNew Endpoints Schema Related Servers Reviews Score

deepsrt-mcp
src

import { describe, test, expect } from 'bun:test';
import axios from 'axios';

// Test with a real YouTube video that's likely to have captions
// Using a stable video with confirmed captions availability
const TEST_VIDEO_ID = 'mzDi8u3WMj0'; // DHH rant against Apple | Lex Fridman Podcast Clips
const TEST_VIDEO_URL = 'https://www.youtube.com/watch?v=mzDi8u3WMj0';

// Utility functions extracted from main implementation for testing
function extractVideoId(input: string): string {
  // Handle direct video ID
  if (input.length === 11 && !input.includes('/') && !input.includes('=')) {
    return input;
  }
  
  // Handle various YouTube URL formats
  const patterns = [
    /(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})/,
    /youtube\.com\/watch\?.*v=([a-zA-Z0-9_-]{11})/
  ];
  
  for (const pattern of patterns) {
    const match = input.match(pattern);
    if (match) {
      return match[1];
    }
  }
  
  throw new Error(`Invalid YouTube URL or video ID: ${input}`);
}

async function getVideoInfo(videoId: string) {
  const response = await axios.post('https://www.youtube.com/youtubei/v1/player', {
    context: {
      client: {
        clientName: 'ANDROID',
        clientVersion: '19.09.37',
        androidSdkVersion: 30,
      }
    },
    videoId: videoId
  });
  
  return response.data;
}

function selectBestCaption(captions: any[], preferredLang: string = 'en') {
  if (!captions || captions.length === 0) {
    return null;
  }
  
  // First, try to find manual captions in preferred language
  const manualPreferred = captions.find(c => 
    c.languageCode === preferredLang && !c.kind
  );
  if (manualPreferred) return manualPreferred;
  
  // Then try auto-generated in preferred language
  const autoPreferred = captions.find(c => 
    c.languageCode === preferredLang && c.kind === 'asr'
  );
  if (autoPreferred) return autoPreferred;
  
  // Fall back to any manual caption
  const anyManual = captions.find(c => !c.kind);
  if (anyManual) return anyManual;
  
  // Finally, any caption
  return captions[0];
}

function parseXMLTranscript(xmlContent: string): Array<{timestamp: string, text: string}> {
  const result: Array<{timestamp: string, text: string}> = [];
  
  // Handle YouTube's timedtext format with <p> tags
  if (xmlContent.includes('<timedtext')) {
    const pRegex = /<p[^>]*t="(\d+)"[^>]*>(.*?)<\/p>/g;
    let match;
    
    while ((match = pRegex.exec(xmlContent)) !== null) {
      const startMs = parseInt(match[1]);
      let textContent = match[2];
      
      // Remove <s> tags if present (word-level timing)
      textContent = textContent.replace(/<\/?s[^>]*>/g, '');
      
      // Decode HTML entities
      textContent = textContent
        .replace(/&amp;/g, '&')
        .replace(/&lt;/g, '<')
        .replace(/&gt;/g, '>')
        .replace(/&quot;/g, '"')
        .replace(/&#39;/g, "'");
      
      // Clean up newlines and extra spaces
      textContent = textContent.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
      
      // Skip empty or music-only segments
      if (textContent && !textContent.match(/^\[.*\]$/) && textContent !== '♪♪♪') {
        const timestamp = formatTimestamp(startMs);
        result.push({ timestamp, text: textContent });
      }
    }
  } else {
    // Handle standard transcript format with <text> tags
    const textRegex = /<text[^>]*start="(\d+)"[^>]*>(.*?)<\/text>/g;
    let match;
    
    while ((match = textRegex.exec(xmlContent)) !== null) {
      const startMs = parseInt(match[1]);
      let textContent = match[2];
      
      // Remove <s> tags if present (word-level timing)
      textContent = textContent.replace(/<\/?s[^>]*>/g, '');
      
      // Decode HTML entities
      textContent = textContent
        .replace(/&amp;/g, '&')
        .replace(/&lt;/g, '<')
        .replace(/&gt;/g, '>')
        .replace(/&quot;/g, '"')
        .replace(/&#39;/g, "'");
      
      const timestamp = formatTimestamp(startMs);
      result.push({ timestamp, text: textContent.trim() });
    }
  }
  
  return result;
}

function formatTimestamp(milliseconds: number): string {
  const totalSeconds = Math.floor(milliseconds / 1000);
  const minutes = Math.floor(totalSeconds / 60);
  const seconds = totalSeconds % 60;
  return `[${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}]`;
}

describe('Real YouTube Transcript Extraction', () => {
  test('extracts video ID from real YouTube URL', () => {
    const videoId = extractVideoId(TEST_VIDEO_URL);
    expect(videoId).toBe(TEST_VIDEO_ID);
    expect(videoId).toHaveLength(11);
  });

  test('fetches real video info from YouTube', async () => {
    const videoInfo = await getVideoInfo(TEST_VIDEO_ID);
    
    // Check if we got a valid response structure
    if (!videoInfo.videoDetails) {
      console.warn('⚠️  Video details not available - this may be due to API rate limiting or video restrictions');
      // Skip the test if video details are not available
      expect(videoInfo).toBeDefined(); // At least verify we got some response
      return;
    }
    
    // Verify we got video details
    expect(videoInfo.videoDetails).toBeDefined();
    expect(videoInfo.videoDetails.videoId).toBe(TEST_VIDEO_ID);
    expect(videoInfo.videoDetails.title).toBeDefined();
    expect(videoInfo.videoDetails.author).toBeDefined();
    
    console.log(`✅ Video found: "${videoInfo.videoDetails.title}" by ${videoInfo.videoDetails.author}`);
  }, 10000); // 10 second timeout for network request

  test('finds captions for real video', async () => {
    const videoInfo = await getVideoInfo(TEST_VIDEO_ID);
    
    // Check if captions exist
    const captions = videoInfo.captions?.playerCaptionsTracklistRenderer?.captionTracks;
    
    if (captions && captions.length > 0) {
      expect(captions).toBeArray();
      expect(captions.length).toBeGreaterThan(0);
      
      // Test caption selection
      const bestCaption = selectBestCaption(captions, 'en');
      expect(bestCaption).toBeDefined();
      expect(bestCaption.baseUrl).toBeDefined();
      expect(bestCaption.languageCode).toBeDefined();
      
      console.log(`✅ Found ${captions.length} caption track(s)`);
      console.log(`✅ Selected caption: ${bestCaption.name?.simpleText || bestCaption.languageCode} (${bestCaption.kind || 'manual'})`);
    } else {
      console.log('⚠️  No captions found for this video - this is expected for some videos');
      // This is not a failure - some videos don't have captions
      expect(true).toBe(true);
    }
  }, 10000);

  test('fetches and parses real transcript (if available)', async () => {
    const videoInfo = await getVideoInfo(TEST_VIDEO_ID);
    const captions = videoInfo.captions?.playerCaptionsTracklistRenderer?.captionTracks;
    
    if (captions && captions.length > 0) {
      const bestCaption = selectBestCaption(captions, 'en');
      
      if (bestCaption?.baseUrl) {
        try {
          // Fetch the actual transcript
          const transcriptResponse = await axios.get(bestCaption.baseUrl);
          expect(transcriptResponse.data).toBeDefined();
          expect(typeof transcriptResponse.data).toBe('string');
          expect(transcriptResponse.data).toContain('<timedtext'); // YouTube uses timedtext format
        
          // Parse the transcript
          const parsedTranscript = parseXMLTranscript(transcriptResponse.data);
          expect(parsedTranscript).toBeArray();
          
          if (parsedTranscript.length > 0) {
            expect(parsedTranscript[0]).toHaveProperty('timestamp');
            expect(parsedTranscript[0]).toHaveProperty('text');
            expect(parsedTranscript[0].timestamp).toMatch(/^\[\d{2}:\d{2}\]$/);
            expect(parsedTranscript[0].text.length).toBeGreaterThan(0);
            
            console.log(`✅ Parsed ${parsedTranscript.length} transcript segments`);
            console.log(`✅ First segment: ${parsedTranscript[0].timestamp} ${parsedTranscript[0].text}`);
            console.log(`✅ Sample segments:`);
            parsedTranscript.slice(0, 3).forEach(segment => {
              console.log(`   ${segment.timestamp} ${segment.text}`);
            });
            
            // Test that we have reasonable content
            expect(parsedTranscript.length).toBeGreaterThan(5); // Should have multiple segments
            expect(parsedTranscript.some(segment => segment.text.length > 10)).toBe(true); // Some segments should have substantial text
          }
        } catch (error: any) {
          if (error.response?.status === 429) {
            console.log('⚠️  Rate limited by YouTube API - this is expected during testing');
            expect(true).toBe(true); // Pass the test - rate limiting is not a failure
          } else {
            console.log(`⚠️  Transcript fetch failed: ${error.message}`);
            // Re-throw other errors as they might indicate real issues
            throw error;
          }
        }
      }
    } else {
      console.log('⚠️  Skipping transcript test - no captions available');
      expect(true).toBe(true);
    }
  }, 15000); // 15 second timeout for transcript fetch

  test('handles video ID extraction edge cases', () => {
    const testCases = [
      { input: 'dQw4w9WgXcQ', expected: 'dQw4w9WgXcQ' },
      { input: 'https://www.youtube.com/watch?v=dQw4w9WgXcQ', expected: 'dQw4w9WgXcQ' },
      { input: 'https://youtu.be/dQw4w9WgXcQ', expected: 'dQw4w9WgXcQ' },
      { input: 'https://www.youtube.com/embed/dQw4w9WgXcQ', expected: 'dQw4w9WgXcQ' },
      { input: 'https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=30s', expected: 'dQw4w9WgXcQ' },
    ];

    testCases.forEach(({ input, expected }) => {
      const result = extractVideoId(input);
      expect(result).toBe(expected);
    });
  });

  test('validates video ID format', () => {
    const validId = extractVideoId(TEST_VIDEO_ID);
    
    // YouTube video IDs are 11 characters long
    expect(validId).toHaveLength(11);
    
    // Should contain only valid characters (letters, numbers, hyphens, underscores)
    expect(validId).toMatch(/^[a-zA-Z0-9_-]{11}$/);
  });
});

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/DeepSRT/deepsrt-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server