Skip to main content
Glama

Short Video MCP Server

by yangbuyiya
MIT License
38
douyin.py4.58 kB
import json import re import httpx from .base import BaseParser, ImgInfo, VideoAuthor, VideoInfo # 模拟手机端请求头 header = { 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) EdgiOS/121.0.2277.107 Version/17.0 Mobile/15E148 Safari/604.1' } class DouYin(BaseParser): """ 抖音 / 抖音火山版 """ async def parse_share_url(self, share_text: str) -> VideoInfo: """从分享文本中提取无水印视频链接""" # 提取分享链接 urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', share_text) if not urls: raise ValueError("未找到有效的分享链接") share_url = urls[0] # 获取video_id async with httpx.AsyncClient(follow_redirects=True) as client: share_response = await client.get(share_url, headers=header) video_id = str(share_response.url).split("?")[0].strip("/").split("/")[-1] detail_url = f'https://www.iesdouyin.com/share/video/{video_id}' response = await client.get(detail_url, headers=header) response.raise_for_status() pattern = re.compile( pattern=r"window\._ROUTER_DATA\s*=\s*(.*?)</script>", flags=re.DOTALL, ) find_res = pattern.search(response.text) if not find_res or not find_res.group(1): raise ValueError("从HTML中解析视频信息失败") # 解析JSON数据 json_data = json.loads(find_res.group(1).strip()) # 获取链接返回json数据进行视频和图集判断,如果指定类型不存在,抛出异常 # 返回的json数据中,视频字典类型为 video_(id)/page VIDEO_ID_PAGE_KEY = "video_(id)/page" # 返回的json数据中,视频字典类型为 note_(id)/page NOTE_ID_PAGE_KEY = "note_(id)/page" if VIDEO_ID_PAGE_KEY in json_data["loaderData"]: original_video_info = json_data["loaderData"][VIDEO_ID_PAGE_KEY]["videoInfoRes"] elif NOTE_ID_PAGE_KEY in json_data["loaderData"]: original_video_info = json_data["loaderData"][NOTE_ID_PAGE_KEY]["videoInfoRes"] else: raise Exception("无法从JSON中解析视频或图集信息") data = original_video_info["item_list"][0] # 获取图集图片地址 images = [] # 如果data含有 images,并且 images 是一个列表 if "images" in data and isinstance(data["images"], list): # 获取每个图片的url_list中的第一个元素,非空时添加到images列表中 for img in data["images"]: if ( "url_list" in img and isinstance(img["url_list"], list) and len(img["url_list"]) > 0 and len(img["url_list"][0]) > 0 ): images.append(ImgInfo(url=img["url_list"][0])) # 获取视频播放地址(snssdk.com直链) video_url = data["video"]["play_addr"]["url_list"][0].replace("playwm", "play") # 如果图集地址不为空时,因为没有视频,上面抖音返回的视频地址无法访问,置空处理 if len(images) > 0: video_url = "" # 组装VideoInfo video_info = VideoInfo( video_url=video_url, cover_url=data["video"]["cover"]["url_list"][0], title=data.get("desc", "").strip() or f"douyin_{video_id}", images=images, author=VideoAuthor( uid=data["author"]["sec_uid"], name=data["author"]["nickname"], avatar=data["author"]["avatar_thumb"]["url_list"][0], ), ) return video_info async def get_video_redirect_url(self, video_url: str) -> str: async with httpx.AsyncClient(follow_redirects=False) as client: response = await client.get(video_url, headers=header) # 返回重定向后的地址,如果没有重定向则返回原地址(抖音中的西瓜视频,重定向地址为空) return response.headers.get("location") or video_url async def parse_video_id(self, video_id: str) -> VideoInfo: req_url = self._get_request_url_by_video_id(video_id) return await self.parse_share_url(req_url) def _get_request_url_by_video_id(self, video_id) -> str: return f"https://www.iesdouyin.com/share/video/{video_id}/"

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yangbuyiya/yby6-crawling-short-video-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server