Skip to main content
Glama

MCP Toolbox

by ai-zerolab
"""XiaoyuZhouFM podcast crawler tools.""" import os import re from typing import Any import httpx from loguru import logger from mcp_toolbox.app import mcp from mcp_toolbox.config import Config class XiaoyuZhouFMCrawler: """XiaoyuZhouFM podcast crawler.""" def __init__(self): """Initialize the crawler.""" self.config = Config() async def extract_audio_url(self, url: str) -> str: """Extract audio URL from XiaoyuZhouFM episode page. Args: url: The XiaoyuZhouFM episode URL Returns: The audio URL Raises: ValueError: If the audio URL cannot be found """ async with httpx.AsyncClient() as client: try: response = await client.get(url) response.raise_for_status() html_content = response.text # Use regex to find the og:audio meta tag pattern = r'<meta\s+property="og:audio"\s+content="([^"]+)"' match = re.search(pattern, html_content) if not match: raise ValueError("Could not find audio URL in the page") audio_url = match.group(1) return audio_url except httpx.HTTPStatusError as e: raise ValueError(f"HTTP error: {e.response.status_code} - {e.response.reason_phrase}") from e except httpx.RequestError as e: raise ValueError(f"Request error: {e}") from e async def download_audio(self, audio_url: str, output_path: str) -> str: """Download audio file from URL. Args: audio_url: The audio file URL output_path: The path to save the audio file Returns: The path to the downloaded file Raises: ValueError: If the download fails """ # Create directory if it doesn't exist output_dir = os.path.dirname(output_path) if output_dir: os.makedirs(output_dir, exist_ok=True) async with httpx.AsyncClient() as client: try: logger.info(f"Downloading audio from {audio_url}") response = await client.get(audio_url) response.raise_for_status() with open(output_path, "wb") as f: f.write(response.content) logger.info(f"Audio saved to {output_path}") return output_path except httpx.HTTPStatusError as e: raise ValueError(f"HTTP error: {e.response.status_code} - {e.response.reason_phrase}") from e except httpx.RequestError as e: raise ValueError(f"Request error: {e}") from e except OSError as e: raise ValueError(f"IO error: {e}") from e # Initialize crawler crawler = XiaoyuZhouFMCrawler() @mcp.tool( description="Crawl and download a podcast episode from XiaoyuZhouFM. Args: xiaoyuzhoufm_url (required, The URL of the XiaoyuZhouFM episode), output_dir (required, The directory to save the audio file)" ) async def xiaoyuzhoufm_download(xiaoyuzhoufm_url: str, output_dir: str) -> dict[str, Any]: """Crawl and download a podcast episode from XiaoyuZhouFM. Args: xiaoyuzhoufm_url: The URL of the XiaoyuZhouFM episode output_dir: The directory to save the audio file Returns: A dictionary containing the audio URL and the path to the downloaded file """ try: # Validate URL if not xiaoyuzhoufm_url.startswith("https://www.xiaoyuzhoufm.com/episode/"): raise ValueError("Invalid XiaoyuZhouFM URL. URL should start with 'https://www.xiaoyuzhoufm.com/episode/'") # Extract episode ID from URL episode_id = xiaoyuzhoufm_url.split("/")[-1] if not episode_id: episode_id = "episode" # Extract audio URL audio_url = await crawler.extract_audio_url(xiaoyuzhoufm_url) # Determine file extension from audio URL file_extension = "m4a" if "." in audio_url.split("/")[-1]: file_extension = audio_url.split("/")[-1].split(".")[-1] # Create output path with episode ID as filename output_path = os.path.join(output_dir, f"{episode_id}.{file_extension}") # Download audio downloaded_path = await crawler.download_audio(audio_url, output_path) return { "audio_url": audio_url, "downloaded_path": downloaded_path, "message": f"Successfully downloaded podcast to {downloaded_path}", } except Exception as e: return { "error": str(e), "message": f"Failed to download podcast: {e!s}", }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ai-zerolab/mcp-toolbox'

If you have feedback or need assistance with the MCP directory API, please join our Discord server