Skip to main content
Glama

Short Video MCP Server

by yangbuyiya
MIT License
38
redbook.py3.08 kB
import re import fake_useragent import httpx import yaml from .base import BaseParser, ImgInfo, VideoAuthor, VideoInfo class RedBook(BaseParser): """ 小红书 """ async def parse_share_url(self, share_url: str) -> VideoInfo: headers = { "User-Agent": fake_useragent.UserAgent(os=["windows"]).random, } async with httpx.AsyncClient(follow_redirects=True) as client: response = await client.get(share_url, headers=headers) response.raise_for_status() pattern = re.compile( pattern=r"window\.__INITIAL_STATE__\s*=\s*(.*?)</script>", flags=re.DOTALL, ) find_res = pattern.search(response.text) if not find_res or not find_res.group(1): raise ValueError("parse video json info from html fail") json_data = yaml.safe_load(find_res.group(1)) note_id = json_data["note"]["currentNoteId"] # 验证返回:小红书的分享链接有有效期,过期后会返回 undefined if note_id == "undefined": raise Exception("parse fail: note id in response is undefined") data = json_data["note"]["noteDetailMap"][note_id]["note"] # 视频地址 video_url = "" h264_data = ( data.get("video", {}).get("media", {}).get("stream", {}).get("h264", []) ) if len(h264_data) > 0: video_url = h264_data[0].get("masterUrl", "") # 获取图集图片地址 images = [] if len(video_url) <= 0: for img_item in data["imageList"]: # 个别图片有水印, 替换图片域名 image_id = img_item["urlDefault"].split("/")[-1].split("!")[0] # 如果链接中带有 spectrum/ , 替换域名时需要带上 spectrum_str = ( "spectrum/" if "spectrum" in img_item["urlDefault"] else "" ) new_url = ( "https://ci.xiaohongshu.com/notes_pre_post/" + f"{spectrum_str}{image_id}" + "?imageView2/format/jpg" ) img_info = ImgInfo(url=new_url) # 是否有 livephoto 视频地址 if img_item.get("livePhoto", False) and ( h264_data := img_item.get("stream", {}).get("h264", []) ): img_info.live_photo_url = h264_data[0]["masterUrl"] images.append(img_info) video_info = VideoInfo( video_url=video_url, cover_url=data["imageList"][0]["urlDefault"], title=data["title"], images=images, author=VideoAuthor( uid=data["user"]["userId"], name=data["user"]["nickname"], avatar=data["user"]["avatar"], ), ) return video_info async def parse_video_id(self, video_id: str) -> VideoInfo: raise NotImplementedError("小红书暂不支持直接解析视频ID")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yangbuyiya/yby6-crawling-short-video-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server