Skip to main content
Glama
Ryan7t
by Ryan7t

parse_xhs_link

Extracts watermark-free videos and images from Xiaohongshu (RedNote) share links by parsing URLs and returning clean resource data in structured format.

Instructions

解析小红书分享链接,自动识别视频或图文类型并返回无水印资源

参数:
- share_link: 小红书分享链接或包含链接的文本

返回:
- 包含资源链接和信息的JSON字符串
- 自动识别类型(video/image)并返回相应格式
- 调用完成后,请将结果整理为以下纯文本格式并反馈给用户(禁止使用Markdown):
  标题(如无则留空):
  文案:
  视频/图片链接:
- 返回时请保留完整的标题和文案,不要省略或截断任何内容
- 若专用解析失败,将自动尝试 generic 兜底逻辑;调用方需同样按上述格式反馈结果
- 抖音仅返回 caption 字段,标题需由调用方自行按需补充

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
share_linkYes

Output Schema

TableJSON Schema
NameRequiredDescriptionDefault
resultYes

Implementation Reference

  • Primary handler function for the 'parse_xhs_link' tool. Decorated with @mcp.tool() for registration. Orchestrates video/image detection, calls XiaohongshuProcessor methods, handles errors and generic fallback.
    @mcp.tool()
    def parse_xhs_link(share_link: str) -> str:
        """
        解析小红书分享链接,自动识别视频或图文类型并返回无水印资源
    
        参数:
        - share_link: 小红书分享链接或包含链接的文本
    
        返回:
        - 包含资源链接和信息的JSON字符串
        - 自动识别类型(video/image)并返回相应格式
        - 调用完成后,请将结果整理为以下纯文本格式并反馈给用户(禁止使用Markdown):
          标题(如无则留空):
          文案:
          视频/图片链接:
        - 返回时请保留完整的标题和文案,不要省略或截断任何内容
        - 若专用解析失败,将自动尝试 generic 兜底逻辑;调用方需同样按上述格式反馈结果
        - 抖音仅返回 caption 字段,标题需由调用方自行按需补充
        """
        try:
            processor = XiaohongshuProcessor()
    
            # 先尝试解析视频
            try:
                video_info = processor.parse_share_url(share_link)
                return json.dumps({
                    "status": "success",
                    "type": "video",
                    "platform": "xiaohongshu",
                    "note_id": video_info.get("note_id", ""),
                    "title": video_info["title"],
                    "caption": video_info.get("desc", ""),
                    "url": video_info["url"],
                    "description": f"视频标题: {video_info['title']}"
                }, ensure_ascii=False, indent=2)
            except Exception as video_error:
                # 如果视频解析失败,尝试图文解析
                error_msg = str(video_error).lower()
                if "未从页面中发现可用视频直链" in error_msg or "video" in error_msg or "候选" in error_msg:
                    try:
                        note_data = processor.parse_image_note(share_link)
                        return json.dumps({
                            "status": "success",
                            "type": "image",
                            "platform": "xiaohongshu",
                            "note_id": note_data["note_id"],
                            "title": note_data["title"],
                            "desc": note_data["desc"],
                            "caption": note_data.get("desc", ""),
                            "image_count": len(note_data["images"]),
                            "images": note_data["images"],
                            "format_info": {
                                "webp": "轻量格式,体积小(约160KB),适合快速预览和节省带宽",
                                "png": "无损格式,高质量(约1.8MB),支持透明背景,适合编辑和打印"
                            }
                        }, ensure_ascii=False, indent=2)
                    except Exception as image_error:
                        return _generic_fallback(share_link, f"小红书图文解析失败: {image_error}")
                return _generic_fallback(share_link, f"小红书视频解析失败: {video_error}")
    
        except Exception as e:
            return _generic_fallback(share_link, f"解析小红书链接失败: {e}")
  • Core helper method in XiaohongshuProcessor class that parses video notes: fetches page, extracts and selects no-watermark video URL.
    def parse_share_url(self, share_text: str) -> Dict[str, str]:
        """解析小红书分享链接,返回视频信息:url/title/note_id
    
        解析策略:
        - 从 HTML 中抓取 <video src> 与 <meta og:video>
        - 通过启发式评分选择无水印直链
        """
        import time
        start_time = time.time()
    
        share_url = self._extract_first_url(share_text)
        logger.debug(f"[小红书视频] 提取到的链接: {share_url}")
    
        note_id = self._extract_note_id_from_path(share_url)
    
        t1 = time.time()
        html = self._fetch_html(share_url)
        logger.debug(f"[小红书视频] 页面请求耗时: {time.time()-t1:.2f}秒")
    
        # 标题:优先 og:title
        title = (
            self._extract_meta(html, "og:title")
            or self._extract_meta(html, "og:description", key="content")
            or (f"xhs_{note_id}" if note_id else "xhs")
        )
        # 清理非法文件名字符
        title = re.sub(r"[\\/:*?\"<>|]", "_", title).strip()
    
        # 候选直链
        candidates: List[Tuple[str, str]] = []
        # 1) video 标签
        for v in self._extract_all_video_src(html):
            candidates.append((v, "video"))
        # 2) og:video
        ogv = self._extract_meta(html, "og:video")
        if ogv:
            candidates.append((ogv, "og"))
    
        if not candidates:
            # 兜底:尝试在页面里扫所有以 xhscdn.com 结尾的 mp4
            for m in re.finditer(r"https?://[^\s'\"]+?\.mp4", html, re.IGNORECASE):
                if "xhscdn.com" in m.group(0):
                    candidates.append((m.group(0), "fallback"))
    
        logger.debug(f"[小红书视频] 找到 {len(candidates)} 个候选视频URL")
    
        t2 = time.time()
        final_url = self.get_watermark_free_url(candidates)
        logger.debug(f"[小红书视频] URL筛选耗时: {time.time()-t2:.2f}秒")
    
        total_time = time.time() - start_time
        logger.debug(f"[小红书视频] 解析完成,总耗时: {total_time:.2f}秒")
        logger.debug(f"{'='*60}\n")
    
        return {
            "url": final_url,
            "title": title,
            "note_id": note_id or "",
        }
  • Core helper method in XiaohongshuProcessor class that parses image notes: extracts metadata and dual-format (WebP/PNG) image URLs.
    def parse_image_note(self, share_text: str) -> Dict[str, any]:
        """解析小红书图文笔记,返回图片列表和笔记信息
    
        返回格式:
        {
            "note_id": str,
            "title": str,
            "desc": str,
            "type": "image",
            "images": [
                {
                    "url_webp": str,  # WebP 格式(体积小,适合预览)
                    "url_png": str,   # PNG 格式(无损高清,支持透明)
                    "width": int,
                    "height": int
                },
                ...
            ]
        }
        """
        import time
        start_time = time.time()
    
        share_url = self._extract_first_url(share_text)
        logger.debug(f"[小红书图文] 提取到的链接: {share_url}")
    
        # 先请求获取重定向后的真实 URL(短链接需要重定向)
        t1 = time.time()
        resp = requests.get(share_url, headers=HEADERS_XHS_PC, timeout=self.timeout, allow_redirects=True)
        logger.debug(f"[小红书图文] 页面请求耗时: {time.time()-t1:.2f}秒")
    
        final_url = resp.url
        html = resp.text
    
        # 从真实 URL 中提取 note_id(支持 /explore/ 和 /discovery/item/ 两种路径)
        note_match = re.search(r'/(?:explore|discovery/item)/([a-z0-9]+)', final_url)
        if not note_match:
            raise ValueError("无法从链接中提取笔记 ID")
    
        note_id = note_match.group(1)
        logger.debug(f"[小红书图文] Note ID: {note_id}")
    
        # 提取 __INITIAL_STATE__ 数据
        t2 = time.time()
        data = self._extract_initial_state(html)
        if not data:
            raise ValueError("无法从页面中提取笔记数据")
        logger.debug(f"[小红书图文] JSON解析耗时: {time.time()-t2:.2f}秒")
    
        # 导航到笔记详情
        try:
            note_map = data['note']['noteDetailMap']
            if note_id not in note_map:
                raise KeyError(f"笔记 {note_id} 不在数据中")
    
            note_info = note_map[note_id]['note']
    
            # 提取图片列表
            image_list = note_info.get('imageList', [])
            if not image_list:
                raise ValueError("笔记中没有找到图片")
    
            # 处理图片 URL(同时提供 WebP 和 PNG 两种格式)
            t3 = time.time()
            images = []
            for img in image_list:
                # 优先从 infoList 中选择 WB_DFT
                webp_url = None
                if 'infoList' in img:
                    for info in img['infoList']:
                        if info.get('imageScene') == 'WB_DFT':
                            webp_url = info.get('url')
                            break
    
                # 回退到 urlDefault
                if not webp_url:
                    webp_url = img.get('urlDefault')
    
                if webp_url:
                    # 确保 WebP URL 使用 HTTPS(避免混合内容问题)
                    webp_url = self._ensure_https(webp_url)
    
                    # 转换为 PNG URL(借鉴油猴脚本逻辑)
                    png_url = self._convert_image_url_to_png(webp_url)
    
                    # 同时保留两种格式
                    images.append({
                        'url_webp': webp_url,  # WebP 格式(体积小,适合预览)- HTTPS
                        'url_png': png_url if png_url else webp_url,  # PNG 格式(无损高清)- HTTPS
                        'width': img.get('width'),
                        'height': img.get('height')
                    })
    
            # 清理标题中的非法字符
            title = note_info.get('title', f'xhs_{note_id}')
            title = re.sub(r"[\\/:*?\"<>|]", "_", title).strip()
    
            logger.debug(f"[小红书图文] 图片URL处理耗时: {time.time()-t3:.2f}秒")
    
            total_time = time.time() - start_time
            logger.debug(f"[小红书图文] 解析完成,总耗时: {total_time:.2f}秒,图片数量: {len(images)}")
            logger.debug(f"{'='*60}\n")
    
            return {
                'note_id': note_id,
                'title': title,
                'desc': note_info.get('desc', ''),
                'type': 'image',
                'images': images
            }
    
        except (KeyError, TypeError) as e:
            raise ValueError(f"解析笔记数据失败: {str(e)}")
  • Docstring providing input parameter description, output format specification, and usage instructions serving as the tool schema.
    """
    解析小红书分享链接,自动识别视频或图文类型并返回无水印资源
    
    参数:
    - share_link: 小红书分享链接或包含链接的文本
    
    返回:
    - 包含资源链接和信息的JSON字符串
    - 自动识别类型(video/image)并返回相应格式
    - 调用完成后,请将结果整理为以下纯文本格式并反馈给用户(禁止使用Markdown):
      标题(如无则留空):
      文案:
      视频/图片链接:
    - 返回时请保留完整的标题和文案,不要省略或截断任何内容
    - 若专用解析失败,将自动尝试 generic 兜底逻辑;调用方需同样按上述格式反馈结果
    - 抖音仅返回 caption 字段,标题需由调用方自行按需补充
    """
  • @mcp.tool() decorator registers the parse_xhs_link function as an MCP tool, automatically generating schema from signature and docstring.
    @mcp.tool()

Tool Definition Quality

Score is being calculated. Check back soon.

Install Server

Other Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Ryan7t/wanyi-watermark'

If you have feedback or need assistance with the MCP directory API, please join our Discord server