MCP Toolbox

Apache 2.0

Overview InspectNew Schema Related Servers Reviews Score

"""XiaoyuZhouFM podcast crawler tools."""

import os
import re
from typing import Any

import httpx
from loguru import logger

from mcp_toolbox.app import mcp
from mcp_toolbox.config import Config


class XiaoyuZhouFMCrawler:
    """XiaoyuZhouFM podcast crawler."""

    def __init__(self):
        """Initialize the crawler."""
        self.config = Config()

    async def extract_audio_url(self, url: str) -> str:
        """Extract audio URL from XiaoyuZhouFM episode page.

        Args:
            url: The XiaoyuZhouFM episode URL

        Returns:
            The audio URL

        Raises:
            ValueError: If the audio URL cannot be found
        """
        async with httpx.AsyncClient() as client:
            try:
                response = await client.get(url)
                response.raise_for_status()
                html_content = response.text

                # Use regex to find the og:audio meta tag
                pattern = r'<meta\s+property="og:audio"\s+content="([^"]+)"'
                match = re.search(pattern, html_content)

                if not match:
                    raise ValueError("Could not find audio URL in the page")

                audio_url = match.group(1)
                return audio_url

            except httpx.HTTPStatusError as e:
                raise ValueError(f"HTTP error: {e.response.status_code} - {e.response.reason_phrase}") from e
            except httpx.RequestError as e:
                raise ValueError(f"Request error: {e}") from e

    async def download_audio(self, audio_url: str, output_path: str) -> str:
        """Download audio file from URL.

        Args:
            audio_url: The audio file URL
            output_path: The path to save the audio file

        Returns:
            The path to the downloaded file

        Raises:
            ValueError: If the download fails
        """
        # Create directory if it doesn't exist
        output_dir = os.path.dirname(output_path)
        if output_dir:
            os.makedirs(output_dir, exist_ok=True)

        async with httpx.AsyncClient() as client:
            try:
                logger.info(f"Downloading audio from {audio_url}")
                response = await client.get(audio_url)
                response.raise_for_status()

                with open(output_path, "wb") as f:
                    f.write(response.content)

                logger.info(f"Audio saved to {output_path}")
                return output_path

            except httpx.HTTPStatusError as e:
                raise ValueError(f"HTTP error: {e.response.status_code} - {e.response.reason_phrase}") from e
            except httpx.RequestError as e:
                raise ValueError(f"Request error: {e}") from e
            except OSError as e:
                raise ValueError(f"IO error: {e}") from e


# Initialize crawler
crawler = XiaoyuZhouFMCrawler()


@mcp.tool(
    description="Crawl and download a podcast episode from XiaoyuZhouFM. Args: xiaoyuzhoufm_url (required, The URL of the XiaoyuZhouFM episode), output_dir (required, The directory to save the audio file)"
)
async def xiaoyuzhoufm_download(xiaoyuzhoufm_url: str, output_dir: str) -> dict[str, Any]:
    """Crawl and download a podcast episode from XiaoyuZhouFM.

    Args:
        xiaoyuzhoufm_url: The URL of the XiaoyuZhouFM episode
        output_dir: The directory to save the audio file

    Returns:
        A dictionary containing the audio URL and the path to the downloaded file
    """
    try:
        # Validate URL
        if not xiaoyuzhoufm_url.startswith("https://www.xiaoyuzhoufm.com/episode/"):
            raise ValueError("Invalid XiaoyuZhouFM URL. URL should start with 'https://www.xiaoyuzhoufm.com/episode/'")

        # Extract episode ID from URL
        episode_id = xiaoyuzhoufm_url.split("/")[-1]
        if not episode_id:
            episode_id = "episode"

        # Extract audio URL
        audio_url = await crawler.extract_audio_url(xiaoyuzhoufm_url)

        # Determine file extension from audio URL
        file_extension = "m4a"
        if "." in audio_url.split("/")[-1]:
            file_extension = audio_url.split("/")[-1].split(".")[-1]

        # Create output path with episode ID as filename
        output_path = os.path.join(output_dir, f"{episode_id}.{file_extension}")

        # Download audio
        downloaded_path = await crawler.download_audio(audio_url, output_path)

        return {
            "audio_url": audio_url,
            "downloaded_path": downloaded_path,
            "message": f"Successfully downloaded podcast to {downloaded_path}",
        }
    except Exception as e:
        return {
            "error": str(e),
            "message": f"Failed to download podcast: {e!s}",
        }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ai-zerolab/mcp-toolbox'

If you have feedback or need assistance with the MCP directory API, please join our Discord server