Link Scan MCP Server

media_handler.py•7.46 KiB

""" Media handler - Extract audio from videos and transcribe using Whisper Supports YouTube, Instagram Reels, and other video platforms """ import os import tempfile import asyncio import logging import whisper from src.utils.llm_summarizer import summarize_with_llama3 from src.utils.youtube_extractor import get_youtube_text_info from src.utils.link_detector import detect_link_type, LinkType logger = logging.getLogger(__name__) # Whisper model (load once, reuse) _whisper_model = None def get_whisper_model(): """Get or load Whisper model (singleton)""" global _whisper_model if _whisper_model is None: logger.info("Loading Whisper model...") _whisper_model = whisper.load_model("base") logger.info("Whisper model loaded") return _whisper_model async def download_audio_from_url(url: str, output_path: str, duration: int = 7) -> str: """ Download audio from video URL using yt-dlp (first N seconds only) Args: url: Video URL output_path: Base path to save audio file (without extension) duration: Duration in seconds to download (default: 7) Returns: str: Path to downloaded audio file """ try: # Use yt-dlp to download audio (first N seconds only) # yt-dlp will add extension automatically, so use output template output_template = f"{output_path}.%(ext)s" cmd = [ 'yt-dlp', '--extract-audio', '--audio-format', 'wav', '--audio-quality', '0', '--no-playlist', '--postprocessor-args', f'ffmpeg:-t {duration}', # Limit to first N seconds '--output', output_template, url ] process = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) try: stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=10) except asyncio.TimeoutError: process.kill() await process.wait() raise Exception("Video download timeout after 10 seconds") if process.returncode != 0: error_msg = stderr.decode() if stderr else "Unknown error" logger.error(f"yt-dlp error: {error_msg}") raise Exception(f"yt-dlp failed: {error_msg}") # yt-dlp adds extension, so look for .wav file audio_file = f"{output_path}.wav" if os.path.exists(audio_file): return audio_file # Try to find any audio file in the directory output_dir = os.path.dirname(output_path) base_name = os.path.basename(output_path) if os.path.exists(output_dir): for file in os.listdir(output_dir): if file.startswith(base_name) and file.endswith(('.wav', '.mp3', '.m4a', '.opus')): audio_file = os.path.join(output_dir, file) return audio_file raise Exception("Audio file not found after download") except FileNotFoundError: raise Exception("yt-dlp not found. Please install yt-dlp: pip install yt-dlp") except Exception as e: logger.error(f"Error downloading audio: {str(e)}") raise Exception(f"Error downloading audio: {str(e)}") async def transcribe_audio_with_whisper(audio_path: str) -> str: """ Transcribe audio file using Whisper Args: audio_path: Path to audio file Returns: str: Transcribed text """ try: model = get_whisper_model() # Transcribe using Whisper logger.info(f"Transcribing audio: {audio_path}") result = model.transcribe(audio_path) text = result.get("text", "").strip() return text except Exception as e: logger.error(f"Error transcribing audio: {str(e)}") raise Exception(f"Error transcribing audio: {str(e)}") async def process_media_link(url: str, duration: int = 7) -> str: """ Process media link: download audio and transcribe using Whisper (first N seconds only) Args: url: Media URL (YouTube, Instagram, etc.) duration: Duration in seconds to process (default: 7) Returns: str: Transcribed text """ # Create temporary directory for audio files with tempfile.TemporaryDirectory() as temp_dir: audio_base_path = os.path.join(temp_dir, "audio") try: # Download audio (first N seconds only) logger.info(f"Downloading audio from: {url} (first {duration} seconds)") audio_path = await download_audio_from_url(url, audio_base_path, duration) # Transcribe using Whisper logger.info("Transcribing audio with Whisper...") transcript = await transcribe_audio_with_whisper(audio_path) return transcript except Exception as e: logger.error(f"Error processing media link: {str(e)}") raise async def summarize_media_link(url: str) -> str: """ Summarize media link (YouTube, Instagram Reels, etc.) Includes title, description, subtitles, and audio transcription Args: url: Media URL Returns: str: Summarized text from video with all text information """ duration = 7 # Process first 7 seconds only # Collect all text information text_parts = [] # For YouTube videos, extract metadata and subtitles link_type = detect_link_type(url) if link_type == LinkType.YOUTUBE: logger.info("Extracting YouTube metadata and subtitles...") title, description, subtitles = await get_youtube_text_info(url, duration) if title: text_parts.append(f"Title: {title}") if description: # Limit description length desc_text = description[:500] + "..." if len(description) > 500 else description text_parts.append(f"Description: {desc_text}") if subtitles: text_parts.append(f"Subtitles (first {duration}s): {subtitles}") # Always try to get audio transcription (first 7 seconds) logger.info("Processing audio transcription (first 7 seconds)...") try: transcript = await process_media_link(url, duration) if transcript and transcript.strip(): text_parts.append(f"Audio Transcription (first {duration}s): {transcript}") except Exception as e: logger.warning(f"Audio transcription failed: {str(e)}") if not text_parts: return f"Error processing video: {str(e)}" # Combine all text information if not text_parts: logger.warning("No text information collected from the video") return "No text information available from the video." combined_text = "\n\n".join(text_parts) logger.info(f"Combined text information - Total length: {len(combined_text)} characters") logger.debug(f"Combined text preview: {combined_text[:300]}...") # Summarize using Llama3 with video-specific prompts logger.info("Starting Llama3 summarization process...") logger.info(f"Sending combined text to Llama3 for summarization (length: {len(combined_text)} chars)") summary = await summarize_with_llama3(combined_text, content_type="video") logger.info(f"Summarization process completed - Final summary length: {len(summary)} characters") return summary

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/chweyun/mcp-link-scan'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

media_handler.py•7.46 KiB