Skip to main content
Glama
Ryan7t
by Ryan7t

parse_xhs_link

Extracts watermark-free videos and images from Xiaohongshu (RedNote) share links by parsing URLs and returning clean resource data in structured format.

Instructions

解析小红书分享链接,自动识别视频或图文类型并返回无水印资源

参数:
- share_link: 小红书分享链接或包含链接的文本

返回:
- 包含资源链接和信息的JSON字符串
- 自动识别类型(video/image)并返回相应格式
- 调用完成后,请将结果整理为以下纯文本格式并反馈给用户(禁止使用Markdown):
  标题(如无则留空):
  文案:
  视频/图片链接:
- 返回时请保留完整的标题和文案,不要省略或截断任何内容
- 若专用解析失败,将自动尝试 generic 兜底逻辑;调用方需同样按上述格式反馈结果
- 抖音仅返回 caption 字段,标题需由调用方自行按需补充

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
share_linkYes

Implementation Reference

  • Primary handler function for the 'parse_xhs_link' tool. Decorated with @mcp.tool() for registration. Orchestrates video/image detection, calls XiaohongshuProcessor methods, handles errors and generic fallback.
    @mcp.tool()
    def parse_xhs_link(share_link: str) -> str:
        """
        解析小红书分享链接,自动识别视频或图文类型并返回无水印资源
    
        参数:
        - share_link: 小红书分享链接或包含链接的文本
    
        返回:
        - 包含资源链接和信息的JSON字符串
        - 自动识别类型(video/image)并返回相应格式
        - 调用完成后,请将结果整理为以下纯文本格式并反馈给用户(禁止使用Markdown):
          标题(如无则留空):
          文案:
          视频/图片链接:
        - 返回时请保留完整的标题和文案,不要省略或截断任何内容
        - 若专用解析失败,将自动尝试 generic 兜底逻辑;调用方需同样按上述格式反馈结果
        - 抖音仅返回 caption 字段,标题需由调用方自行按需补充
        """
        try:
            processor = XiaohongshuProcessor()
    
            # 先尝试解析视频
            try:
                video_info = processor.parse_share_url(share_link)
                return json.dumps({
                    "status": "success",
                    "type": "video",
                    "platform": "xiaohongshu",
                    "note_id": video_info.get("note_id", ""),
                    "title": video_info["title"],
                    "caption": video_info.get("desc", ""),
                    "url": video_info["url"],
                    "description": f"视频标题: {video_info['title']}"
                }, ensure_ascii=False, indent=2)
            except Exception as video_error:
                # 如果视频解析失败,尝试图文解析
                error_msg = str(video_error).lower()
                if "未从页面中发现可用视频直链" in error_msg or "video" in error_msg or "候选" in error_msg:
                    try:
                        note_data = processor.parse_image_note(share_link)
                        return json.dumps({
                            "status": "success",
                            "type": "image",
                            "platform": "xiaohongshu",
                            "note_id": note_data["note_id"],
                            "title": note_data["title"],
                            "desc": note_data["desc"],
                            "caption": note_data.get("desc", ""),
                            "image_count": len(note_data["images"]),
                            "images": note_data["images"],
                            "format_info": {
                                "webp": "轻量格式,体积小(约160KB),适合快速预览和节省带宽",
                                "png": "无损格式,高质量(约1.8MB),支持透明背景,适合编辑和打印"
                            }
                        }, ensure_ascii=False, indent=2)
                    except Exception as image_error:
                        return _generic_fallback(share_link, f"小红书图文解析失败: {image_error}")
                return _generic_fallback(share_link, f"小红书视频解析失败: {video_error}")
    
        except Exception as e:
            return _generic_fallback(share_link, f"解析小红书链接失败: {e}")
  • Core helper method in XiaohongshuProcessor class that parses video notes: fetches page, extracts and selects no-watermark video URL.
    def parse_share_url(self, share_text: str) -> Dict[str, str]:
        """解析小红书分享链接,返回视频信息:url/title/note_id
    
        解析策略:
        - 从 HTML 中抓取 <video src> 与 <meta og:video>
        - 通过启发式评分选择无水印直链
        """
        import time
        start_time = time.time()
    
        share_url = self._extract_first_url(share_text)
        logger.debug(f"[小红书视频] 提取到的链接: {share_url}")
    
        note_id = self._extract_note_id_from_path(share_url)
    
        t1 = time.time()
        html = self._fetch_html(share_url)
        logger.debug(f"[小红书视频] 页面请求耗时: {time.time()-t1:.2f}秒")
    
        # 标题:优先 og:title
        title = (
            self._extract_meta(html, "og:title")
            or self._extract_meta(html, "og:description", key="content")
            or (f"xhs_{note_id}" if note_id else "xhs")
        )
        # 清理非法文件名字符
        title = re.sub(r"[\\/:*?\"<>|]", "_", title).strip()
    
        # 候选直链
        candidates: List[Tuple[str, str]] = []
        # 1) video 标签
        for v in self._extract_all_video_src(html):
            candidates.append((v, "video"))
        # 2) og:video
        ogv = self._extract_meta(html, "og:video")
        if ogv:
            candidates.append((ogv, "og"))
    
        if not candidates:
            # 兜底:尝试在页面里扫所有以 xhscdn.com 结尾的 mp4
            for m in re.finditer(r"https?://[^\s'\"]+?\.mp4", html, re.IGNORECASE):
                if "xhscdn.com" in m.group(0):
                    candidates.append((m.group(0), "fallback"))
    
        logger.debug(f"[小红书视频] 找到 {len(candidates)} 个候选视频URL")
    
        t2 = time.time()
        final_url = self.get_watermark_free_url(candidates)
        logger.debug(f"[小红书视频] URL筛选耗时: {time.time()-t2:.2f}秒")
    
        total_time = time.time() - start_time
        logger.debug(f"[小红书视频] 解析完成,总耗时: {total_time:.2f}秒")
        logger.debug(f"{'='*60}\n")
    
        return {
            "url": final_url,
            "title": title,
            "note_id": note_id or "",
        }
  • Core helper method in XiaohongshuProcessor class that parses image notes: extracts metadata and dual-format (WebP/PNG) image URLs.
    def parse_image_note(self, share_text: str) -> Dict[str, any]:
        """解析小红书图文笔记,返回图片列表和笔记信息
    
        返回格式:
        {
            "note_id": str,
            "title": str,
            "desc": str,
            "type": "image",
            "images": [
                {
                    "url_webp": str,  # WebP 格式(体积小,适合预览)
                    "url_png": str,   # PNG 格式(无损高清,支持透明)
                    "width": int,
                    "height": int
                },
                ...
            ]
        }
        """
        import time
        start_time = time.time()
    
        share_url = self._extract_first_url(share_text)
        logger.debug(f"[小红书图文] 提取到的链接: {share_url}")
    
        # 先请求获取重定向后的真实 URL(短链接需要重定向)
        t1 = time.time()
        resp = requests.get(share_url, headers=HEADERS_XHS_PC, timeout=self.timeout, allow_redirects=True)
        logger.debug(f"[小红书图文] 页面请求耗时: {time.time()-t1:.2f}秒")
    
        final_url = resp.url
        html = resp.text
    
        # 从真实 URL 中提取 note_id(支持 /explore/ 和 /discovery/item/ 两种路径)
        note_match = re.search(r'/(?:explore|discovery/item)/([a-z0-9]+)', final_url)
        if not note_match:
            raise ValueError("无法从链接中提取笔记 ID")
    
        note_id = note_match.group(1)
        logger.debug(f"[小红书图文] Note ID: {note_id}")
    
        # 提取 __INITIAL_STATE__ 数据
        t2 = time.time()
        data = self._extract_initial_state(html)
        if not data:
            raise ValueError("无法从页面中提取笔记数据")
        logger.debug(f"[小红书图文] JSON解析耗时: {time.time()-t2:.2f}秒")
    
        # 导航到笔记详情
        try:
            note_map = data['note']['noteDetailMap']
            if note_id not in note_map:
                raise KeyError(f"笔记 {note_id} 不在数据中")
    
            note_info = note_map[note_id]['note']
    
            # 提取图片列表
            image_list = note_info.get('imageList', [])
            if not image_list:
                raise ValueError("笔记中没有找到图片")
    
            # 处理图片 URL(同时提供 WebP 和 PNG 两种格式)
            t3 = time.time()
            images = []
            for img in image_list:
                # 优先从 infoList 中选择 WB_DFT
                webp_url = None
                if 'infoList' in img:
                    for info in img['infoList']:
                        if info.get('imageScene') == 'WB_DFT':
                            webp_url = info.get('url')
                            break
    
                # 回退到 urlDefault
                if not webp_url:
                    webp_url = img.get('urlDefault')
    
                if webp_url:
                    # 确保 WebP URL 使用 HTTPS(避免混合内容问题)
                    webp_url = self._ensure_https(webp_url)
    
                    # 转换为 PNG URL(借鉴油猴脚本逻辑)
                    png_url = self._convert_image_url_to_png(webp_url)
    
                    # 同时保留两种格式
                    images.append({
                        'url_webp': webp_url,  # WebP 格式(体积小,适合预览)- HTTPS
                        'url_png': png_url if png_url else webp_url,  # PNG 格式(无损高清)- HTTPS
                        'width': img.get('width'),
                        'height': img.get('height')
                    })
    
            # 清理标题中的非法字符
            title = note_info.get('title', f'xhs_{note_id}')
            title = re.sub(r"[\\/:*?\"<>|]", "_", title).strip()
    
            logger.debug(f"[小红书图文] 图片URL处理耗时: {time.time()-t3:.2f}秒")
    
            total_time = time.time() - start_time
            logger.debug(f"[小红书图文] 解析完成,总耗时: {total_time:.2f}秒,图片数量: {len(images)}")
            logger.debug(f"{'='*60}\n")
    
            return {
                'note_id': note_id,
                'title': title,
                'desc': note_info.get('desc', ''),
                'type': 'image',
                'images': images
            }
    
        except (KeyError, TypeError) as e:
            raise ValueError(f"解析笔记数据失败: {str(e)}")
  • Docstring providing input parameter description, output format specification, and usage instructions serving as the tool schema.
    """
    解析小红书分享链接,自动识别视频或图文类型并返回无水印资源
    
    参数:
    - share_link: 小红书分享链接或包含链接的文本
    
    返回:
    - 包含资源链接和信息的JSON字符串
    - 自动识别类型(video/image)并返回相应格式
    - 调用完成后,请将结果整理为以下纯文本格式并反馈给用户(禁止使用Markdown):
      标题(如无则留空):
      文案:
      视频/图片链接:
    - 返回时请保留完整的标题和文案,不要省略或截断任何内容
    - 若专用解析失败,将自动尝试 generic 兜底逻辑;调用方需同样按上述格式反馈结果
    - 抖音仅返回 caption 字段,标题需由调用方自行按需补充
    """
  • @mcp.tool() decorator registers the parse_xhs_link function as an MCP tool, automatically generating schema from signature and docstring.
    @mcp.tool()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Ryan7t/wanyi-watermark'

If you have feedback or need assistance with the MCP directory API, please join our Discord server