Obsidian MCP Server

Overview Schema Related Servers Score Discussions

youtube_logic.py•3.93 KiB

"""
Core business logic for YouTube tools.

This module handles video ID extraction and fetching using the Result pattern.
"""

import re
from typing import Optional
from urllib.parse import parse_qs, urlparse

from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import NoTranscriptFound
from youtube_transcript_api.formatters import TextFormatter

from ..result import Result


def extract_video_id(url: str) -> str:
    """
    Extract YouTube video ID from various URL formats.

    Returns:
        Video ID string or empty string if not found.
    """
    # Common YouTube URL patterns
    patterns = [
        r"(?:v=|\/)([0-9A-Za-z_-]{11}).*",
        r"(?:youtu\.be\/)([0-9A-Za-z_-]{11})",
        r"(?:embed\/)([0-9A-Za-z_-]{11})",
    ]

    # Try standard URL parsing first
    parsed_url = urlparse(url)
    if parsed_url.hostname in ("www.youtube.com", "youtube.com"):
        if parsed_url.path == "/watch":
            params = parse_qs(parsed_url.query)
            if "v" in params:
                return params["v"][0]
        if parsed_url.path.startswith("/shorts/"):
            return parsed_url.path.split("/shorts/")[1]

    # Try regex for other cases (youtu.be, embeds, etc)
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)

    # If input looks like an ID (11 chars)
    if len(url) == 11 and re.match(r"^[0-9A-Za-z_-]{11}$", url):
        return url

    return ""


def get_transcript_text(url: str, language: Optional[str] = None) -> Result[str]:
    """
    Get the transcript of a YouTube video.

    Args:
        url: YouTube video URL or ID.
        language: Preferred language code. If None, attemps to detect original logic.

    Returns:
        Result with full transcript text or error message.
    """
    video_id = extract_video_id(url)
    if not video_id:
        return Result.fail(
            "No se pudo extraer un ID de video válido de la URL proporcionada."
        )

    try:
        # Instantiate API
        api = YouTubeTranscriptApi()

        # Get available transcripts list
        transcript_list = api.list(video_id)

        target_transcript = None

        if language:
            # If language specified, try to find it
            try:
                target_transcript = transcript_list.find_transcript([language])
            except NoTranscriptFound:
                # Fallback: will try other languages below
                target_transcript = None
        else:
            # Auto logic: Prioritize manual, then generated
            # 1. Search for manuals (is_generated=False)
            for t in transcript_list:
                if not t.is_generated:
                    target_transcript = t
                    break

            # 2. If no manual, use first available (likely generated)
            if not target_transcript:
                for t in transcript_list:
                    target_transcript = t
                    break

        if not target_transcript:
            # Fallback logic
            languages = ["es", "en"] if not language else [language]
            target_transcript = transcript_list.find_transcript(languages)

        # Fetch transcript
        transcript_data = target_transcript.fetch()

        # Format to plain text
        formatter = TextFormatter()
        text_formatted = formatter.format_transcript(transcript_data)

        metadata = (
            f"Idioma: {target_transcript.language} ({target_transcript.language_code})"
        )
        if target_transcript.is_generated:
            metadata += " [Autogenerado]"
        else:
            metadata += " [Manual]"

        return Result.ok(
            f"✅ Transcripción obtenida para video {video_id}\n"
            f"ℹ️ {metadata}:\n\n{text_formatted}"
        )

    except Exception as e:  # pylint: disable=broad-exception-caught
        return Result.fail(f"Error al obtener transcripción para {video_id}: {e}")

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Vasallo94/obsidian-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

youtube_logic.py•3.93 KiB

"""
Core business logic for YouTube tools.

This module handles video ID extraction and fetching using the Result pattern.
"""

import re
from typing import Optional
from urllib.parse import parse_qs, urlparse

from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import NoTranscriptFound
from youtube_transcript_api.formatters import TextFormatter

from ..result import Result


def extract_video_id(url: str) -> str:
    """
    Extract YouTube video ID from various URL formats.

    Returns:
        Video ID string or empty string if not found.
    """
    # Common YouTube URL patterns
    patterns = [
        r"(?:v=|\/)([0-9A-Za-z_-]{11}).*",
        r"(?:youtu\.be\/)([0-9A-Za-z_-]{11})",
        r"(?:embed\/)([0-9A-Za-z_-]{11})",
    ]

    # Try standard URL parsing first
    parsed_url = urlparse(url)
    if parsed_url.hostname in ("www.youtube.com", "youtube.com"):
        if parsed_url.path == "/watch":
            params = parse_qs(parsed_url.query)
            if "v" in params:
                return params["v"][0]
        if parsed_url.path.startswith("/shorts/"):
            return parsed_url.path.split("/shorts/")[1]

    # Try regex for other cases (youtu.be, embeds, etc)
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)

    # If input looks like an ID (11 chars)
    if len(url) == 11 and re.match(r"^[0-9A-Za-z_-]{11}$", url):
        return url

    return ""


def get_transcript_text(url: str, language: Optional[str] = None) -> Result[str]:
    """
    Get the transcript of a YouTube video.

    Args:
        url: YouTube video URL or ID.
        language: Preferred language code. If None, attemps to detect original logic.

    Returns:
        Result with full transcript text or error message.
    """
    video_id = extract_video_id(url)
    if not video_id:
        return Result.fail(
            "No se pudo extraer un ID de video válido de la URL proporcionada."
        )

    try:
        # Instantiate API
        api = YouTubeTranscriptApi()

        # Get available transcripts list
        transcript_list = api.list(video_id)

        target_transcript = None

        if language:
            # If language specified, try to find it
            try:
                target_transcript = transcript_list.find_transcript([language])
            except NoTranscriptFound:
                # Fallback: will try other languages below
                target_transcript = None
        else:
            # Auto logic: Prioritize manual, then generated
            # 1. Search for manuals (is_generated=False)
            for t in transcript_list:
                if not t.is_generated:
                    target_transcript = t
                    break

            # 2. If no manual, use first available (likely generated)
            if not target_transcript:
                for t in transcript_list:
                    target_transcript = t
                    break

        if not target_transcript:
            # Fallback logic
            languages = ["es", "en"] if not language else [language]
            target_transcript = transcript_list.find_transcript(languages)

        # Fetch transcript
        transcript_data = target_transcript.fetch()

        # Format to plain text
        formatter = TextFormatter()
        text_formatted = formatter.format_transcript(transcript_data)

        metadata = (
            f"Idioma: {target_transcript.language} ({target_transcript.language_code})"
        )
        if target_transcript.is_generated:
            metadata += " [Autogenerado]"
        else:
            metadata += " [Manual]"

        return Result.ok(
            f"✅ Transcripción obtenida para video {video_id}\n"
            f"ℹ️ {metadata}:\n\n{text_formatted}"
        )

    except Exception as e:  # pylint: disable=broad-exception-caught
        return Result.fail(f"Error al obtener transcripción para {video_id}: {e}")