youtube-service.ts•19.9 kB
import { google, youtube_v3 } from 'googleapis';
import dotenv from 'dotenv';
import { getSubtitles } from 'youtube-captions-scraper';
import NodeCache from 'node-cache';
import { TranscriptSegment, TranscriptOptions, FormattedTranscript, TranscriptError, TimeRange, SearchOptions } from './types/youtube-types.js';
dotenv.config();
const TRANSCRIPT_CACHE_TTL = 3600; // Cache transcripts for 1 hour
export class YouTubeService {
  public youtube: youtube_v3.Youtube;
  private transcriptCache: NodeCache;
  private apiKey: string;
  constructor(apiKey?: string) {
    this.apiKey = apiKey || process.env.YOUTUBE_API_KEY || '';
    
    if (!this.apiKey) {
      console.error('Warning: YouTube API key not provided');
      console.error('YouTube API functionality will be limited');
    }
    this.youtube = google.youtube({
      version: 'v3',
      auth: this.apiKey
    });
    this.transcriptCache = new NodeCache({ stdTTL: TRANSCRIPT_CACHE_TTL });
  }
  async searchVideos(
    query: string,
    maxResults: number = 10,
    options: {
      channelId?: string;
      order?: string;
      type?: string;
      videoDuration?: string;
      publishedAfter?: string;
      publishedBefore?: string;
      videoCaption?: string;
      videoDefinition?: string;
      regionCode?: string;
    } = {}
  ): Promise<youtube_v3.Schema$SearchListResponse> {
    try {
      const response = await this.youtube.search.list({
        part: ['snippet'],
        q: query,
        maxResults,
        type: options.type ? [options.type] : ['video'],
        channelId: options.channelId,
        order: options.order,
        videoDuration: options.videoDuration,
        publishedAfter: options.publishedAfter,
        publishedBefore: options.publishedBefore,
        videoCaption: options.videoCaption,
        videoDefinition: options.videoDefinition,
        regionCode: options.regionCode
      });
      return response.data;
    } catch (error) {
      console.error('Error searching videos:', error);
      throw error;
    }
  }
  async getVideoDetails(videoId: string): Promise<youtube_v3.Schema$VideoListResponse> {
    try {
      const response = await this.youtube.videos.list({
        part: ['snippet', 'contentDetails', 'statistics'],
        id: [videoId]
      });
      return response.data;
    } catch (error) {
      console.error('Error getting video details:', error);
      throw error;
    }
  }
  async getChannelDetails(channelId: string): Promise<youtube_v3.Schema$ChannelListResponse> {
    try {
      const response = await this.youtube.channels.list({
        part: ['snippet', 'statistics'],
        id: [channelId]
      });
      return response.data;
    } catch (error) {
      console.error('Error getting channel details:', error);
      throw error;
    }
  }
  async getComments(
    videoId: string,
    maxResults: number = 20,
    options: {
      order?: 'time' | 'relevance';
      pageToken?: string;
      includeReplies?: boolean;
    } = {}
  ): Promise<youtube_v3.Schema$CommentThreadListResponse> {
    try {
      const { order = 'relevance', pageToken, includeReplies = false } = options;
      const response = await this.youtube.commentThreads.list({
        part: includeReplies ? ['snippet', 'replies'] : ['snippet'],
        videoId,
        maxResults,
        order,
        pageToken
      });
      return response.data;
    } catch (error) {
      console.error('Error getting comments:', error);
      throw error;
    }
  }
  async getTranscript(
    videoId: string,
    language?: string
  ): Promise<TranscriptSegment[]>;
  async getTranscript(
    videoId: string,
    options: TranscriptOptions
  ): Promise<TranscriptSegment[]>;
  async getTranscript(
    videoId: string,
    langOrOptions?: string | TranscriptOptions
  ): Promise<TranscriptSegment[]> {
    // Normalize options to support both legacy language string and new options object
    const options: TranscriptOptions = typeof langOrOptions === 'string'
      ? { language: langOrOptions }
      : langOrOptions || {};
    const cacheKey = this.generateTranscriptCacheKey(videoId, options);
    const cachedTranscript = this.transcriptCache.get<TranscriptSegment[]>(cacheKey);
    if (cachedTranscript) {
      return this.processTranscript(cachedTranscript, options);
    }
    try {
      const scraperOptions: { videoID: string; lang?: string } = { videoID: videoId };
      if (options.language) {
        scraperOptions.lang = options.language;
      }
      const captions = await getSubtitles(scraperOptions);
      this.transcriptCache.set(cacheKey, captions);
      return this.processTranscript(captions, options);
    } catch (error) {
      const errorMessage = error instanceof Error ? error.message : 'Unknown error';
      console.error(`Error getting video transcript for ${videoId}:`, error);
      throw new TranscriptError({
        message: `Failed to fetch transcript: ${errorMessage}`,
        videoId,
        options,
        originalError: error instanceof Error ? error : new Error(errorMessage)
      });
    }
  }
  async getEnhancedTranscript(
    videoId: string | string[],
    options: TranscriptOptions = {}
  ): Promise<FormattedTranscript> {
    try {
      const videoIds = Array.isArray(videoId) ? videoId : [videoId];
      const transcriptPromises = videoIds.map(id => this.getTranscript(id, options));
      const transcripts = await Promise.all(transcriptPromises);
      // Combine transcripts if multiple videos
      let combinedSegments: TranscriptSegment[] = [];
      transcripts.forEach((segments, index) => {
        // Add video identifier to each segment if multiple videos
        if (videoIds.length > 1) {
          segments = segments.map(segment => ({
            ...segment,
            videoId: videoIds[index]
          }));
        }
        combinedSegments = [...combinedSegments, ...segments];
      });
      const videoDetailsPromises = videoIds.map(id => this.getVideoDetails(id));
      const videoDetails = await Promise.all(videoDetailsPromises);
      // Process and format the transcript
      const processedTranscript = this.processTranscript(combinedSegments, options);
      // Format the final output
      return this.formatTranscript(processedTranscript, videoDetails, options);
    } catch (error) {
      console.error('Error getting enhanced transcript:', error);
      throw error;
    }
  }
  private processTranscript(
    segments: TranscriptSegment[],
    options: TranscriptOptions
  ): TranscriptSegment[] {
    if (!segments.length) {
      return [];
    }
    let processedSegments = [...segments];
    // Filter by time range if specified
    if (options.timeRange) {
      processedSegments = this.filterByTimeRange(processedSegments, options.timeRange);
    }
    // Filter by search text if specified
    if (options.search) {
      processedSegments = this.filterBySearchText(processedSegments, options.search);
    }
    // Apply segment splitting if specified
    if (options.segment) {
      processedSegments = this.segmentTranscript(processedSegments, options.segment);
    }
    return processedSegments;
  }
  private filterByTimeRange(
    segments: TranscriptSegment[],
    timeRange: TimeRange
  ): TranscriptSegment[] {
    const { start = 0, end } = timeRange;
    return segments.filter(segment => {
      const segmentStart = segment.offset / 1000; // Convert to seconds
      const segmentEnd = (segment.offset + segment.duration) / 1000;
      if (end) {
        return segmentStart >= start && segmentEnd <= end;
      }
      return segmentStart >= start;
    });
  }
  private filterBySearchText(
    segments: TranscriptSegment[],
    search: SearchOptions
  ): TranscriptSegment[] {
    const { query, caseSensitive = false, contextLines = 0 } = search;
    if (!query || query.trim() === '') {
      return segments;
    }
    const matchedIndices: number[] = [];
    // Find all segments that match the search query
    segments.forEach((segment, index) => {
      const text = caseSensitive ? segment.text : segment.text.toLowerCase();
      const searchText = caseSensitive ? query : query.toLowerCase();
      if (text.includes(searchText)) {
        matchedIndices.push(index);
      }
    });
    // If no matches, return empty array
    if (matchedIndices.length === 0) {
      return [];
    }
    // Add context lines
    const indicesWithContext = new Set<number>();
    matchedIndices.forEach(index => {
      indicesWithContext.add(index);
      for (let i = 1; i <= contextLines; i++) {
        if (index - i >= 0) {
          indicesWithContext.add(index - i);
        }
        if (index + i < segments.length) {
          indicesWithContext.add(index + i);
        }
      }
    });
    // Sort indices and return segments
    return Array.from(indicesWithContext)
      .sort((a, b) => a - b)
      .map(index => segments[index]);
  }
  private segmentTranscript(
    segments: TranscriptSegment[],
    segmentOptions: { method: 'equal' | 'smart', count: number }
  ): TranscriptSegment[] {
    const { method = 'equal', count = 1 } = segmentOptions;
    if (count <= 1 || segments.length <= count) {
      return segments;
    }
    if (method === 'equal') {
      // Split into equal segments
      const segmentSize = Math.ceil(segments.length / count);
      const result: TranscriptSegment[][] = [];
      for (let i = 0; i < segments.length; i += segmentSize) {
        result.push(segments.slice(i, i + segmentSize));
      }
      return result.flat();
    } else {
      // Smart segmentation based on content
      // This would ideally use NLP to find natural segment boundaries
      // For now, we'll use a simple approach
      const totalDuration = segments.reduce((sum, segment) => sum + segment.duration, 0);
      const durationPerSegment = totalDuration / count;
      const result: TranscriptSegment[][] = [];
      let currentSegment: TranscriptSegment[] = [];
      let currentDuration = 0;
      segments.forEach(segment => {
        currentSegment.push(segment);
        currentDuration += segment.duration;
        if (currentDuration >= durationPerSegment && result.length < count - 1) {
          result.push(currentSegment);
          currentSegment = [];
          currentDuration = 0;
        }
      });
      if (currentSegment.length > 0) {
        result.push(currentSegment);
      }
      return result.flat();
    }
  }
  private formatTranscript(
    segments: TranscriptSegment[],
    videoDetails: youtube_v3.Schema$VideoListResponse[],
    options: TranscriptOptions
  ): FormattedTranscript {
    const { format = 'raw' } = options;
    // Basic metadata
    const result: FormattedTranscript = {
      segments,
      totalSegments: segments.length,
      duration: segments.reduce((sum, segment) => sum + segment.duration, 0) / 1000, // in seconds
      format
    };
    // Add video metadata if requested
    if (options.includeMetadata) {
      result.metadata = videoDetails.map(details => {
        const video = details.items?.[0];
        if (!video) return null;
        return {
          id: video.id,
          title: video.snippet?.title,
          channelId: video.snippet?.channelId,
          channelTitle: video.snippet?.channelTitle,
          publishedAt: video.snippet?.publishedAt,
          duration: video.contentDetails?.duration,
          viewCount: video.statistics?.viewCount,
          likeCount: video.statistics?.likeCount
        };
      }).filter(Boolean);
    }
    // Format transcript according to requested format
    if (format === 'timestamped') {
      result.text = segments.map(segment => {
        const startTime = this.formatTimestamp(segment.offset);
        return `[${startTime}] ${segment.text}`;
      }).join('\n');
    } else if (format === 'merged') {
      result.text = segments.map(segment => segment.text).join(' ');
    }
    return result;
  }
  /**
   * Extracts key moments from a transcript based on content analysis
   * @param videoId Video ID to analyze
   * @param maxMoments Maximum number of key moments to extract
   * @returns A formatted transcript with key moments and their timestamps
   */
  async getKeyMomentsTranscript(
    videoId: string,
    maxMoments: number = 5
  ): Promise<FormattedTranscript> {
    try {
      // Get full transcript
      const transcriptData = await this.getTranscript(videoId);
      // Get video details for title and other metadata
      const videoData = await this.getVideoDetails(videoId);
      const video = videoData.items?.[0];
      if (!transcriptData.length) {
        throw new Error('No transcript available for this video');
      }
      // Convert to paragraph chunks to better identify key moments
      const paragraphs: { text: string; startTime: number; endTime: number }[] = [];
      let currentParagraph = '';
      let startTime = 0;
      // Group segments into logical paragraphs (simple approach: group 5-8 segments together)
      const paragraphSize = Math.max(5, Math.min(8, Math.floor(transcriptData.length / 15)));
      for (let i = 0; i < transcriptData.length; i++) {
        const segment = transcriptData[i];
        if (i % paragraphSize === 0) {
          if (currentParagraph) {
            paragraphs.push({
              text: currentParagraph.trim(),
              startTime,
              endTime: segment.offset / 1000
            });
          }
          currentParagraph = segment.text;
          startTime = segment.offset / 1000;
        } else {
          currentParagraph += ' ' + segment.text;
        }
      }
      // Add the last paragraph
      if (currentParagraph) {
        const lastSegment = transcriptData[transcriptData.length - 1];
        paragraphs.push({
          text: currentParagraph.trim(),
          startTime,
          endTime: (lastSegment.offset + lastSegment.duration) / 1000
        });
      }
      // Identify key moments (simple approach: paragraphs with the most content)
      // In a real implementation, this would use NLP to identify important moments
      const keyMoments = paragraphs
        .filter(p => p.text.length > 100) // Filter out short paragraphs
        .sort((a, b) => b.text.length - a.text.length) // Sort by length (simple heuristic)
        .slice(0, maxMoments); // Take only the top N moments
      // Create formatted output
      const title = video?.snippet?.title || 'Video Transcript';
      let formattedText = `# Key Moments in: ${title}\n\n`;
      keyMoments.forEach((moment, index) => {
        const timeFormatted = this.formatTimestamp(moment.startTime * 1000);
        formattedText += `## Key Moment ${index + 1} [${timeFormatted}]\n${moment.text}\n\n`;
      });
      // Add full transcript at the end
      formattedText += `\n# Full Transcript\n\n`;
      formattedText += transcriptData.map(segment =>
        `[${this.formatTimestamp(segment.offset)}] ${segment.text}`
      ).join('\n');
      return {
        segments: transcriptData,
        totalSegments: transcriptData.length,
        duration: (transcriptData[transcriptData.length - 1].offset +
                 transcriptData[transcriptData.length - 1].duration) / 1000,
        format: 'timestamped',
        text: formattedText,
        metadata: video ? [{
          id: video.id,
          title: video.snippet?.title,
          channelId: video.snippet?.channelId,
          channelTitle: video.snippet?.channelTitle,
          publishedAt: video.snippet?.publishedAt,
          duration: video.contentDetails?.duration,
          viewCount: video.statistics?.viewCount,
          likeCount: video.statistics?.likeCount
        }] : undefined
      };
    } catch (error) {
      console.error('Error getting key moments transcript:', error);
      throw error;
    }
  }
  /**
   * Divides a video transcript into segments and prepares it for segment-by-segment analysis
   * @param videoId Video ID to segment
   * @param segmentCount Number of segments to divide the transcript into
   * @returns A formatted transcript with segments marked by timestamps
   */
  async getSegmentedTranscript(
    videoId: string,
    segmentCount: number = 4
  ): Promise<FormattedTranscript> {
    try {
      // Get full transcript
      const transcriptData = await this.getTranscript(videoId);
      // Get video details for title and other metadata
      const videoData = await this.getVideoDetails(videoId);
      const video = videoData.items?.[0];
      if (!transcriptData.length) {
        throw new Error('No transcript available for this video');
      }
      // Calculate total duration
      const lastSegment = transcriptData[transcriptData.length - 1];
      const totalDuration = (lastSegment.offset + lastSegment.duration) / 1000; // in seconds
      // Calculate segment size
      const segmentDuration = totalDuration / segmentCount;
      const segments: {
        startTime: number;
        endTime: number;
        text: string;
        transcriptSegments: TranscriptSegment[];
      }[] = [];
      // Create segments
      for (let i = 0; i < segmentCount; i++) {
        const startTime = i * segmentDuration;
        const endTime = (i + 1) * segmentDuration;
        // Find all transcript segments that fall within this time range
        const segmentTranscript = transcriptData.filter(segment => {
          const segmentStartTime = segment.offset / 1000;
          const segmentEndTime = (segment.offset + segment.duration) / 1000;
          return segmentStartTime >= startTime && segmentStartTime < endTime;
        });
        if (segmentTranscript.length > 0) {
          segments.push({
            startTime,
            endTime,
            text: segmentTranscript.map(s => s.text).join(' '),
            transcriptSegments: segmentTranscript
          });
        }
      }
      // Create formatted output
      const title = video?.snippet?.title || 'Video Transcript';
      let formattedText = `# Segmented Transcript: ${title}\n\n`;
      segments.forEach((segment, index) => {
        const startTimeFormatted = this.formatTimestamp(segment.startTime * 1000);
        const endTimeFormatted = this.formatTimestamp(segment.endTime * 1000);
        formattedText += `## Segment ${index + 1} [${startTimeFormatted} - ${endTimeFormatted}]\n\n`;
        // Add transcript for this segment
        formattedText += segment.transcriptSegments.map(s =>
          `[${this.formatTimestamp(s.offset)}] ${s.text}`
        ).join('\n');
        formattedText += '\n\n';
      });
      return {
        segments: transcriptData,
        totalSegments: transcriptData.length,
        duration: totalDuration,
        format: 'timestamped',
        text: formattedText,
        metadata: video ? [{
          id: video.id,
          title: video.snippet?.title,
          channelId: video.snippet?.channelId,
          channelTitle: video.snippet?.channelTitle,
          publishedAt: video.snippet?.publishedAt,
          duration: video.contentDetails?.duration,
          viewCount: video.statistics?.viewCount,
          likeCount: video.statistics?.likeCount
        }] : undefined
      };
    } catch (error) {
      console.error('Error creating segmented transcript:', error);
      throw error;
    }
  }
  private formatTimestamp(milliseconds: number): string {
    const totalSeconds = Math.floor(milliseconds / 1000);
    const minutes = Math.floor(totalSeconds / 60);
    const seconds = totalSeconds % 60;
    return `${minutes}:${seconds.toString().padStart(2, '0')}`;
  }
  private generateTranscriptCacheKey(videoId: string, options: TranscriptOptions): string {
    const optionsString = JSON.stringify({
      language: options.language || 'default'
    });
    return `transcript_${videoId}_${optionsString}`;
  }
}