"""
Media handler - Extract audio from videos and transcribe using Whisper
Supports YouTube, Instagram Reels, and other video platforms
"""
import os
import tempfile
import asyncio
import logging
import whisper
from src.utils.llm_summarizer import summarize_with_llama3
from src.utils.youtube_extractor import get_youtube_text_info
from src.utils.link_detector import detect_link_type, LinkType
logger = logging.getLogger(__name__)
# Whisper model (load once, reuse)
_whisper_model = None
def get_whisper_model():
"""Get or load Whisper model (singleton)"""
global _whisper_model
if _whisper_model is None:
logger.info("Loading Whisper model...")
_whisper_model = whisper.load_model("base")
logger.info("Whisper model loaded")
return _whisper_model
async def download_audio_from_url(url: str, output_path: str, duration: int = 7) -> str:
"""
Download audio from video URL using yt-dlp (first N seconds only)
Args:
url: Video URL
output_path: Base path to save audio file (without extension)
duration: Duration in seconds to download (default: 7)
Returns:
str: Path to downloaded audio file
"""
try:
# Use yt-dlp to download audio (first N seconds only)
# yt-dlp will add extension automatically, so use output template
output_template = f"{output_path}.%(ext)s"
cmd = [
'yt-dlp',
'--extract-audio',
'--audio-format', 'wav',
'--audio-quality', '0',
'--no-playlist',
'--postprocessor-args', f'ffmpeg:-t {duration}', # Limit to first N seconds
'--output', output_template,
url
]
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
try:
stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=10)
except asyncio.TimeoutError:
process.kill()
await process.wait()
raise Exception("Video download timeout after 10 seconds")
if process.returncode != 0:
error_msg = stderr.decode() if stderr else "Unknown error"
logger.error(f"yt-dlp error: {error_msg}")
raise Exception(f"yt-dlp failed: {error_msg}")
# yt-dlp adds extension, so look for .wav file
audio_file = f"{output_path}.wav"
if os.path.exists(audio_file):
return audio_file
# Try to find any audio file in the directory
output_dir = os.path.dirname(output_path)
base_name = os.path.basename(output_path)
if os.path.exists(output_dir):
for file in os.listdir(output_dir):
if file.startswith(base_name) and file.endswith(('.wav', '.mp3', '.m4a', '.opus')):
audio_file = os.path.join(output_dir, file)
return audio_file
raise Exception("Audio file not found after download")
except FileNotFoundError:
raise Exception("yt-dlp not found. Please install yt-dlp: pip install yt-dlp")
except Exception as e:
logger.error(f"Error downloading audio: {str(e)}")
raise Exception(f"Error downloading audio: {str(e)}")
async def transcribe_audio_with_whisper(audio_path: str) -> str:
"""
Transcribe audio file using Whisper
Args:
audio_path: Path to audio file
Returns:
str: Transcribed text
"""
try:
model = get_whisper_model()
# Transcribe using Whisper
logger.info(f"Transcribing audio: {audio_path}")
result = model.transcribe(audio_path)
text = result.get("text", "").strip()
return text
except Exception as e:
logger.error(f"Error transcribing audio: {str(e)}")
raise Exception(f"Error transcribing audio: {str(e)}")
async def process_media_link(url: str, duration: int = 7) -> str:
"""
Process media link: download audio and transcribe using Whisper (first N seconds only)
Args:
url: Media URL (YouTube, Instagram, etc.)
duration: Duration in seconds to process (default: 7)
Returns:
str: Transcribed text
"""
# Create temporary directory for audio files
with tempfile.TemporaryDirectory() as temp_dir:
audio_base_path = os.path.join(temp_dir, "audio")
try:
# Download audio (first N seconds only)
logger.info(f"Downloading audio from: {url} (first {duration} seconds)")
audio_path = await download_audio_from_url(url, audio_base_path, duration)
# Transcribe using Whisper
logger.info("Transcribing audio with Whisper...")
transcript = await transcribe_audio_with_whisper(audio_path)
return transcript
except Exception as e:
logger.error(f"Error processing media link: {str(e)}")
raise
async def summarize_media_link(url: str) -> str:
"""
Summarize media link (YouTube, Instagram Reels, etc.)
Includes title, description, subtitles, and audio transcription
Args:
url: Media URL
Returns:
str: Summarized text from video with all text information
"""
duration = 7 # Process first 7 seconds only
# Collect all text information
text_parts = []
# For YouTube videos, extract metadata and subtitles
link_type = detect_link_type(url)
if link_type == LinkType.YOUTUBE:
logger.info("Extracting YouTube metadata and subtitles...")
title, description, subtitles = await get_youtube_text_info(url, duration)
if title:
text_parts.append(f"Title: {title}")
if description:
# Limit description length
desc_text = description[:500] + "..." if len(description) > 500 else description
text_parts.append(f"Description: {desc_text}")
if subtitles:
text_parts.append(f"Subtitles (first {duration}s): {subtitles}")
# Always try to get audio transcription (first 7 seconds)
logger.info("Processing audio transcription (first 7 seconds)...")
try:
transcript = await process_media_link(url, duration)
if transcript and transcript.strip():
text_parts.append(f"Audio Transcription (first {duration}s): {transcript}")
except Exception as e:
logger.warning(f"Audio transcription failed: {str(e)}")
if not text_parts:
return f"Error processing video: {str(e)}"
# Combine all text information
if not text_parts:
logger.warning("No text information collected from the video")
return "No text information available from the video."
combined_text = "\n\n".join(text_parts)
logger.info(f"Combined text information - Total length: {len(combined_text)} characters")
logger.debug(f"Combined text preview: {combined_text[:300]}...")
# Summarize using Llama3 with video-specific prompts
logger.info("Starting Llama3 summarization process...")
logger.info(f"Sending combined text to Llama3 for summarization (length: {len(combined_text)} chars)")
summary = await summarize_with_llama3(combined_text, content_type="video")
logger.info(f"Summarization process completed - Final summary length: {len(summary)} characters")
return summary