Skip to main content
Glama
Ryan7t
by Ryan7t

parse_douyin_link

Extract watermark-free videos and images from Douyin (TikTok) share links by automatically identifying content type and returning clean resources.

Instructions

解析抖音分享链接,自动识别视频或图文类型并返回无水印资源

参数:
- share_link: 抖音分享链接或包含链接的文本

返回:
- 包含资源链接和信息的JSON字符串
- 自动识别类型(video/image)并返回相应格式
- 调用完成后,请将结果整理为以下纯文本格式并反馈给用户(禁止使用Markdown):
  标题(如无则留空):
  文案:
  视频/图片链接:
- 返回时请保留完整的标题和文案,不要省略或截断任何内容
- 若专用解析失败,将自动尝试 generic 兜底逻辑;调用方需同样按上述格式反馈结果
- 抖音仅返回 caption 字段,标题需由调用方自行按需补充

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
share_linkYes

Output Schema

TableJSON Schema
NameRequiredDescriptionDefault
resultYes

Implementation Reference

  • The primary MCP tool handler for 'parse_douyin_link', decorated with @mcp.tool(). It processes Douyin share links, auto-detects video or image content via DouyinProcessor, extracts no-watermark resources, and uses generic fallback if parsing fails. Returns structured JSON.
    @mcp.tool()
    def parse_douyin_link(share_link: str) -> str:
        """
        解析抖音分享链接,自动识别视频或图文类型并返回无水印资源
    
        参数:
        - share_link: 抖音分享链接或包含链接的文本
    
        返回:
        - 包含资源链接和信息的JSON字符串
        - 自动识别类型(video/image)并返回相应格式
        - 调用完成后,请将结果整理为以下纯文本格式并反馈给用户(禁止使用Markdown):
          标题(如无则留空):
          文案:
          视频/图片链接:
        - 返回时请保留完整的标题和文案,不要省略或截断任何内容
        - 若专用解析失败,将自动尝试 generic 兜底逻辑;调用方需同样按上述格式反馈结果
        - 抖音仅返回 caption 字段,标题需由调用方自行按需补充
        """
        try:
            processor = DouyinProcessor("")  # 获取资源不需要API密钥
    
            # 先尝试解析视频
            try:
                video_info = processor.parse_share_url(share_link)
                # 仅输出 caption 和资源链接,前端已确认无需 title 字段
                return json.dumps({
                    "status": "success",
                    "type": "video",
                    "platform": "douyin",
                    "video_id": video_info["video_id"],
                    "caption": video_info.get("caption", ""),
                    "url": video_info["url"]
                }, ensure_ascii=False, indent=2)
            except Exception as video_error:
                # 如果视频解析失败,检查是否为图文笔记
                error_msg = str(video_error)
                if "这是图文笔记" in error_msg:
                    try:
                        note_data = processor.parse_image_note(share_link)
                        # 图文同样只保留 caption,避免重复字段
                        return json.dumps({
                            "status": "success",
                            "type": "image",
                            "platform": "douyin",
                            "note_id": note_data["note_id"],
                            "caption": note_data.get("caption", ""),
                            "image_count": len(note_data["images"]),
                            "images": note_data["images"]
                        }, ensure_ascii=False, indent=2)
                    except Exception as image_error:
                        return _generic_fallback(share_link, f"抖音图文解析失败: {image_error}")
                return _generic_fallback(share_link, f"抖音视频解析失败: {video_error}")
    
        except Exception as e:
            return _generic_fallback(share_link, f"解析抖音链接失败: {e}")
  • DouyinProcessor.parse_share_url: Core logic to extract video ID from redirected share URL, fetch page HTML, parse embedded JSON (window._ROUTER_DATA), retrieve no-watermark video URL (playwm -> play), caption, and metadata.
    def parse_share_url(self, share_text: str) -> Dict[str, str]:
        """从分享文本中提取无水印视频链接"""
        import time
        start_time = time.time()
    
        # 提取分享链接
        urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', share_text)
        if not urls:
            raise ValueError("未找到有效的分享链接")
    
        share_url = urls[0]
        logger.debug(f"[抖音视频] 提取到的链接: {share_url}")
    
        t1 = time.time()
        share_response = requests.get(share_url, headers=HEADERS, timeout=10, allow_redirects=True)
        logger.debug(f"[抖音视频] 短链接重定向耗时: {time.time()-t1:.2f}秒")
    
        video_id = share_response.url.split("?")[0].strip("/").split("/")[-1]
        logger.debug(f"[抖音视频] 视频ID: {video_id}")
    
        share_url = f'https://www.iesdouyin.com/share/video/{video_id}'
    
        # 获取视频页面内容
        t2 = time.time()
        response = requests.get(share_url, headers=HEADERS, timeout=10)
        response.raise_for_status()
        logger.debug(f"[抖音视频] 页面请求耗时: {time.time()-t2:.2f}秒")
    
        pattern = re.compile(
            pattern=r"window\._ROUTER_DATA\s*=\s*(.*?)</script>",
            flags=re.DOTALL,
        )
        find_res = pattern.search(response.text)
    
        if not find_res or not find_res.group(1):
            raise ValueError("从HTML中解析视频信息失败")
    
        # 解析JSON数据
        json_data = json.loads(find_res.group(1).strip())
        VIDEO_ID_PAGE_KEY = "video_(id)/page"
        NOTE_ID_PAGE_KEY = "note_(id)/page"
    
        if VIDEO_ID_PAGE_KEY in json_data["loaderData"]:
            original_video_info = json_data["loaderData"][VIDEO_ID_PAGE_KEY]["videoInfoRes"]
        elif NOTE_ID_PAGE_KEY in json_data["loaderData"]:
            original_video_info = json_data["loaderData"][NOTE_ID_PAGE_KEY]["videoInfoRes"]
        else:
            raise Exception("无法从JSON中解析视频或图集信息")
    
        data = original_video_info["item_list"][0]
    
        # 检查是否为图文笔记
        if "images" in data and data["images"]:
            raise ValueError("这是图文笔记,请使用 parse_image_note 方法")
    
        # 检查是否有视频
        if "video" not in data or not data.get("video"):
            raise ValueError("未找到视频信息")
    
        # 获取视频信息(去水印:playwm -> play)
        video_url = data["video"]["play_addr"]["url_list"][0].replace("playwm", "play")
        raw_desc = data.get("desc", "").strip()
        if not raw_desc:
            raw_desc = f"douyin_{video_id}"
    
        # 替换文件名中的非法字符,仅用于文件命名
        safe_title = re.sub(r'[\\/:*?"<>|]', '_', raw_desc)
    
        total_time = time.time() - start_time
        logger.debug(f"[抖音视频] 解析完成,总耗时: {total_time:.2f}秒")
        logger.debug(f"{'='*60}\n")
    
        return {
            "url": video_url,
            "title": safe_title,
            "caption": raw_desc,
            "video_id": video_id
        }
  • DouyinProcessor.parse_image_note: Parses image notes (non-video posts), extracts note ID, caption, and list of no-watermark image URLs with dimensions from page JSON.
    def parse_image_note(self, share_text: str) -> Dict[str, any]:
        """从分享文本中提取抖音图文笔记,返回图片列表和笔记信息
    
        返回格式:
        {
            "note_id": str,
            "title": str,
            "desc": str,
            "type": "image",
            "images": [
                {
                    "url": str,  # 无水印图片 URL
                    "width": int,
                    "height": int
                },
                ...
            ]
        }
        """
        import time
    
        start_time = time.time()
    
        # 提取分享链接
        urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', share_text)
        if not urls:
            raise ValueError("未找到有效的分享链接")
    
        share_url = urls[0]
        logger.debug(f"[抖音图文] 开始处理短链接: {share_url}")
    
        # 第一次请求:短链接重定向
        t1 = time.time()
        share_response = requests.get(share_url, headers=HEADERS, timeout=10, allow_redirects=True)
        logger.debug(f"[抖音图文] 短链接重定向耗时: {time.time()-t1:.2f}秒")
    
        note_id = share_response.url.split("?")[0].strip("/").split("/")[-1]
        logger.debug(f"[抖音图文] Note ID: {note_id}")
    
        # 第二次请求:获取页面内容(实际上第一次请求已经返回了内容,可以直接使用)
        # response = requests.get(share_response.url, headers=HEADERS, timeout=10)
        # 优化:直接使用第一次请求的响应,避免重复请求
        response = share_response
        response.raise_for_status()
    
        pattern = re.compile(
            pattern=r"window\._ROUTER_DATA\s*=\s*(.*?)</script>",
            flags=re.DOTALL,
        )
        find_res = pattern.search(response.text)
    
        if not find_res or not find_res.group(1):
            raise ValueError("从HTML中解析图文信息失败")
    
        # 解析JSON数据
        json_data = json.loads(find_res.group(1).strip())
        NOTE_ID_PAGE_KEY = "note_(id)/page"
    
        if NOTE_ID_PAGE_KEY not in json_data["loaderData"]:
            raise ValueError("该链接不是图文笔记")
    
        original_note_info = json_data["loaderData"][NOTE_ID_PAGE_KEY]["videoInfoRes"]
        data = original_note_info["item_list"][0]
    
        # 检查是否有图片
        if "images" not in data or not data["images"]:
            raise ValueError("该笔记中没有找到图片")
    
        # 提取图片列表(使用 url_list 获取无水印图片)
        images = []
        for img in data["images"]:
            if "url_list" in img and img["url_list"]:
                images.append({
                    "url": img["url_list"][0],  # 使用第一个 URL(无水印版本)
                    "width": img.get("width"),
                    "height": img.get("height")
                })
    
        if not images:
            raise ValueError("无法提取图片URL")
    
        # 获取标题(抖音图文没有单独的描述字段,desc 就是标题)
        desc = data.get("desc", "").strip() or f"douyin_{note_id}"
        # 替换文件名中的非法字符,仅用于文件命名
        title = re.sub(r'[\\/:*?"<>|]', '_', desc)
    
        total_time = time.time() - start_time
        logger.debug(f"[抖音图文] 解析完成,总耗时: {total_time:.2f}秒,图片数量: {len(images)}")
        logger.debug(f"{'='*60}\n")
    
        return {
            "note_id": note_id,
            "title": title,
            "desc": desc,
            "caption": desc,
            "type": "image",
            "images": images
        }
  • _generic_fallback: Universal fallback function called when platform-specific parsing fails, invokes extract_generic_media and formats error/response.
    def _generic_fallback(share_link: str, reason: str) -> str:
        """通用兜底逻辑:在专用解析失败时尝试通用提取。"""
    
        try:
            fallback_data = extract_generic_media(share_link)
            fallback_data.setdefault("fallback_reason", reason)
            return json.dumps(fallback_data, ensure_ascii=False, indent=2)
        except Exception as fallback_error:
            return json.dumps({
                "status": "error",
                "error": f"{reason};兜底解析失败:{fallback_error}"
            }, ensure_ascii=False, indent=2)
  • @mcp.tool() decorator registers the parse_douyin_link function as an MCP tool.
    @mcp.tool()

Tool Definition Quality

Score is being calculated. Check back soon.

Install Server

Other Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Ryan7t/wanyi-watermark'

If you have feedback or need assistance with the MCP directory API, please join our Discord server