Skip to main content
Glama

parse_douyin_link

Extract watermark-free videos and images from Douyin (TikTok) share links by automatically identifying content type and returning clean resources.

Instructions

解析抖音分享链接,自动识别视频或图文类型并返回无水印资源 参数: - share_link: 抖音分享链接或包含链接的文本 返回: - 包含资源链接和信息的JSON字符串 - 自动识别类型(video/image)并返回相应格式 - 调用完成后,请将结果整理为以下纯文本格式并反馈给用户(禁止使用Markdown): 标题(如无则留空): 文案: 视频/图片链接: - 返回时请保留完整的标题和文案,不要省略或截断任何内容 - 若专用解析失败,将自动尝试 generic 兜底逻辑;调用方需同样按上述格式反馈结果 - 抖音仅返回 caption 字段,标题需由调用方自行按需补充

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
share_linkYes

Implementation Reference

  • The primary MCP tool handler for 'parse_douyin_link', decorated with @mcp.tool(). It processes Douyin share links, auto-detects video or image content via DouyinProcessor, extracts no-watermark resources, and uses generic fallback if parsing fails. Returns structured JSON.
    @mcp.tool() def parse_douyin_link(share_link: str) -> str: """ 解析抖音分享链接,自动识别视频或图文类型并返回无水印资源 参数: - share_link: 抖音分享链接或包含链接的文本 返回: - 包含资源链接和信息的JSON字符串 - 自动识别类型(video/image)并返回相应格式 - 调用完成后,请将结果整理为以下纯文本格式并反馈给用户(禁止使用Markdown): 标题(如无则留空): 文案: 视频/图片链接: - 返回时请保留完整的标题和文案,不要省略或截断任何内容 - 若专用解析失败,将自动尝试 generic 兜底逻辑;调用方需同样按上述格式反馈结果 - 抖音仅返回 caption 字段,标题需由调用方自行按需补充 """ try: processor = DouyinProcessor("") # 获取资源不需要API密钥 # 先尝试解析视频 try: video_info = processor.parse_share_url(share_link) # 仅输出 caption 和资源链接,前端已确认无需 title 字段 return json.dumps({ "status": "success", "type": "video", "platform": "douyin", "video_id": video_info["video_id"], "caption": video_info.get("caption", ""), "url": video_info["url"] }, ensure_ascii=False, indent=2) except Exception as video_error: # 如果视频解析失败,检查是否为图文笔记 error_msg = str(video_error) if "这是图文笔记" in error_msg: try: note_data = processor.parse_image_note(share_link) # 图文同样只保留 caption,避免重复字段 return json.dumps({ "status": "success", "type": "image", "platform": "douyin", "note_id": note_data["note_id"], "caption": note_data.get("caption", ""), "image_count": len(note_data["images"]), "images": note_data["images"] }, ensure_ascii=False, indent=2) except Exception as image_error: return _generic_fallback(share_link, f"抖音图文解析失败: {image_error}") return _generic_fallback(share_link, f"抖音视频解析失败: {video_error}") except Exception as e: return _generic_fallback(share_link, f"解析抖音链接失败: {e}")
  • DouyinProcessor.parse_share_url: Core logic to extract video ID from redirected share URL, fetch page HTML, parse embedded JSON (window._ROUTER_DATA), retrieve no-watermark video URL (playwm -> play), caption, and metadata.
    def parse_share_url(self, share_text: str) -> Dict[str, str]: """从分享文本中提取无水印视频链接""" import time start_time = time.time() # 提取分享链接 urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', share_text) if not urls: raise ValueError("未找到有效的分享链接") share_url = urls[0] logger.debug(f"[抖音视频] 提取到的链接: {share_url}") t1 = time.time() share_response = requests.get(share_url, headers=HEADERS, timeout=10, allow_redirects=True) logger.debug(f"[抖音视频] 短链接重定向耗时: {time.time()-t1:.2f}秒") video_id = share_response.url.split("?")[0].strip("/").split("/")[-1] logger.debug(f"[抖音视频] 视频ID: {video_id}") share_url = f'https://www.iesdouyin.com/share/video/{video_id}' # 获取视频页面内容 t2 = time.time() response = requests.get(share_url, headers=HEADERS, timeout=10) response.raise_for_status() logger.debug(f"[抖音视频] 页面请求耗时: {time.time()-t2:.2f}秒") pattern = re.compile( pattern=r"window\._ROUTER_DATA\s*=\s*(.*?)</script>", flags=re.DOTALL, ) find_res = pattern.search(response.text) if not find_res or not find_res.group(1): raise ValueError("从HTML中解析视频信息失败") # 解析JSON数据 json_data = json.loads(find_res.group(1).strip()) VIDEO_ID_PAGE_KEY = "video_(id)/page" NOTE_ID_PAGE_KEY = "note_(id)/page" if VIDEO_ID_PAGE_KEY in json_data["loaderData"]: original_video_info = json_data["loaderData"][VIDEO_ID_PAGE_KEY]["videoInfoRes"] elif NOTE_ID_PAGE_KEY in json_data["loaderData"]: original_video_info = json_data["loaderData"][NOTE_ID_PAGE_KEY]["videoInfoRes"] else: raise Exception("无法从JSON中解析视频或图集信息") data = original_video_info["item_list"][0] # 检查是否为图文笔记 if "images" in data and data["images"]: raise ValueError("这是图文笔记,请使用 parse_image_note 方法") # 检查是否有视频 if "video" not in data or not data.get("video"): raise ValueError("未找到视频信息") # 获取视频信息(去水印:playwm -> play) video_url = data["video"]["play_addr"]["url_list"][0].replace("playwm", "play") raw_desc = data.get("desc", "").strip() if not raw_desc: raw_desc = f"douyin_{video_id}" # 替换文件名中的非法字符,仅用于文件命名 safe_title = re.sub(r'[\\/:*?"<>|]', '_', raw_desc) total_time = time.time() - start_time logger.debug(f"[抖音视频] 解析完成,总耗时: {total_time:.2f}秒") logger.debug(f"{'='*60}\n") return { "url": video_url, "title": safe_title, "caption": raw_desc, "video_id": video_id }
  • DouyinProcessor.parse_image_note: Parses image notes (non-video posts), extracts note ID, caption, and list of no-watermark image URLs with dimensions from page JSON.
    def parse_image_note(self, share_text: str) -> Dict[str, any]: """从分享文本中提取抖音图文笔记,返回图片列表和笔记信息 返回格式: { "note_id": str, "title": str, "desc": str, "type": "image", "images": [ { "url": str, # 无水印图片 URL "width": int, "height": int }, ... ] } """ import time start_time = time.time() # 提取分享链接 urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', share_text) if not urls: raise ValueError("未找到有效的分享链接") share_url = urls[0] logger.debug(f"[抖音图文] 开始处理短链接: {share_url}") # 第一次请求:短链接重定向 t1 = time.time() share_response = requests.get(share_url, headers=HEADERS, timeout=10, allow_redirects=True) logger.debug(f"[抖音图文] 短链接重定向耗时: {time.time()-t1:.2f}秒") note_id = share_response.url.split("?")[0].strip("/").split("/")[-1] logger.debug(f"[抖音图文] Note ID: {note_id}") # 第二次请求:获取页面内容(实际上第一次请求已经返回了内容,可以直接使用) # response = requests.get(share_response.url, headers=HEADERS, timeout=10) # 优化:直接使用第一次请求的响应,避免重复请求 response = share_response response.raise_for_status() pattern = re.compile( pattern=r"window\._ROUTER_DATA\s*=\s*(.*?)</script>", flags=re.DOTALL, ) find_res = pattern.search(response.text) if not find_res or not find_res.group(1): raise ValueError("从HTML中解析图文信息失败") # 解析JSON数据 json_data = json.loads(find_res.group(1).strip()) NOTE_ID_PAGE_KEY = "note_(id)/page" if NOTE_ID_PAGE_KEY not in json_data["loaderData"]: raise ValueError("该链接不是图文笔记") original_note_info = json_data["loaderData"][NOTE_ID_PAGE_KEY]["videoInfoRes"] data = original_note_info["item_list"][0] # 检查是否有图片 if "images" not in data or not data["images"]: raise ValueError("该笔记中没有找到图片") # 提取图片列表(使用 url_list 获取无水印图片) images = [] for img in data["images"]: if "url_list" in img and img["url_list"]: images.append({ "url": img["url_list"][0], # 使用第一个 URL(无水印版本) "width": img.get("width"), "height": img.get("height") }) if not images: raise ValueError("无法提取图片URL") # 获取标题(抖音图文没有单独的描述字段,desc 就是标题) desc = data.get("desc", "").strip() or f"douyin_{note_id}" # 替换文件名中的非法字符,仅用于文件命名 title = re.sub(r'[\\/:*?"<>|]', '_', desc) total_time = time.time() - start_time logger.debug(f"[抖音图文] 解析完成,总耗时: {total_time:.2f}秒,图片数量: {len(images)}") logger.debug(f"{'='*60}\n") return { "note_id": note_id, "title": title, "desc": desc, "caption": desc, "type": "image", "images": images }
  • _generic_fallback: Universal fallback function called when platform-specific parsing fails, invokes extract_generic_media and formats error/response.
    def _generic_fallback(share_link: str, reason: str) -> str: """通用兜底逻辑:在专用解析失败时尝试通用提取。""" try: fallback_data = extract_generic_media(share_link) fallback_data.setdefault("fallback_reason", reason) return json.dumps(fallback_data, ensure_ascii=False, indent=2) except Exception as fallback_error: return json.dumps({ "status": "error", "error": f"{reason};兜底解析失败:{fallback_error}" }, ensure_ascii=False, indent=2)
  • @mcp.tool() decorator registers the parse_douyin_link function as an MCP tool.
    @mcp.tool()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Ryan7t/wanyi-watermark'

If you have feedback or need assistance with the MCP directory API, please join our Discord server