Skip to main content
Glama

parse_xhs_link

Extracts watermark-free videos and images from Xiaohongshu (RedNote) share links by parsing URLs and returning clean resource data in structured format.

Instructions

解析小红书分享链接,自动识别视频或图文类型并返回无水印资源 参数: - share_link: 小红书分享链接或包含链接的文本 返回: - 包含资源链接和信息的JSON字符串 - 自动识别类型(video/image)并返回相应格式 - 调用完成后,请将结果整理为以下纯文本格式并反馈给用户(禁止使用Markdown): 标题(如无则留空): 文案: 视频/图片链接: - 返回时请保留完整的标题和文案,不要省略或截断任何内容 - 若专用解析失败,将自动尝试 generic 兜底逻辑;调用方需同样按上述格式反馈结果 - 抖音仅返回 caption 字段,标题需由调用方自行按需补充

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
share_linkYes

Implementation Reference

  • Primary handler function for the 'parse_xhs_link' tool. Decorated with @mcp.tool() for registration. Orchestrates video/image detection, calls XiaohongshuProcessor methods, handles errors and generic fallback.
    @mcp.tool() def parse_xhs_link(share_link: str) -> str: """ 解析小红书分享链接,自动识别视频或图文类型并返回无水印资源 参数: - share_link: 小红书分享链接或包含链接的文本 返回: - 包含资源链接和信息的JSON字符串 - 自动识别类型(video/image)并返回相应格式 - 调用完成后,请将结果整理为以下纯文本格式并反馈给用户(禁止使用Markdown): 标题(如无则留空): 文案: 视频/图片链接: - 返回时请保留完整的标题和文案,不要省略或截断任何内容 - 若专用解析失败,将自动尝试 generic 兜底逻辑;调用方需同样按上述格式反馈结果 - 抖音仅返回 caption 字段,标题需由调用方自行按需补充 """ try: processor = XiaohongshuProcessor() # 先尝试解析视频 try: video_info = processor.parse_share_url(share_link) return json.dumps({ "status": "success", "type": "video", "platform": "xiaohongshu", "note_id": video_info.get("note_id", ""), "title": video_info["title"], "caption": video_info.get("desc", ""), "url": video_info["url"], "description": f"视频标题: {video_info['title']}" }, ensure_ascii=False, indent=2) except Exception as video_error: # 如果视频解析失败,尝试图文解析 error_msg = str(video_error).lower() if "未从页面中发现可用视频直链" in error_msg or "video" in error_msg or "候选" in error_msg: try: note_data = processor.parse_image_note(share_link) return json.dumps({ "status": "success", "type": "image", "platform": "xiaohongshu", "note_id": note_data["note_id"], "title": note_data["title"], "desc": note_data["desc"], "caption": note_data.get("desc", ""), "image_count": len(note_data["images"]), "images": note_data["images"], "format_info": { "webp": "轻量格式,体积小(约160KB),适合快速预览和节省带宽", "png": "无损格式,高质量(约1.8MB),支持透明背景,适合编辑和打印" } }, ensure_ascii=False, indent=2) except Exception as image_error: return _generic_fallback(share_link, f"小红书图文解析失败: {image_error}") return _generic_fallback(share_link, f"小红书视频解析失败: {video_error}") except Exception as e: return _generic_fallback(share_link, f"解析小红书链接失败: {e}")
  • Core helper method in XiaohongshuProcessor class that parses video notes: fetches page, extracts and selects no-watermark video URL.
    def parse_share_url(self, share_text: str) -> Dict[str, str]: """解析小红书分享链接,返回视频信息:url/title/note_id 解析策略: - 从 HTML 中抓取 <video src> 与 <meta og:video> - 通过启发式评分选择无水印直链 """ import time start_time = time.time() share_url = self._extract_first_url(share_text) logger.debug(f"[小红书视频] 提取到的链接: {share_url}") note_id = self._extract_note_id_from_path(share_url) t1 = time.time() html = self._fetch_html(share_url) logger.debug(f"[小红书视频] 页面请求耗时: {time.time()-t1:.2f}秒") # 标题:优先 og:title title = ( self._extract_meta(html, "og:title") or self._extract_meta(html, "og:description", key="content") or (f"xhs_{note_id}" if note_id else "xhs") ) # 清理非法文件名字符 title = re.sub(r"[\\/:*?\"<>|]", "_", title).strip() # 候选直链 candidates: List[Tuple[str, str]] = [] # 1) video 标签 for v in self._extract_all_video_src(html): candidates.append((v, "video")) # 2) og:video ogv = self._extract_meta(html, "og:video") if ogv: candidates.append((ogv, "og")) if not candidates: # 兜底:尝试在页面里扫所有以 xhscdn.com 结尾的 mp4 for m in re.finditer(r"https?://[^\s'\"]+?\.mp4", html, re.IGNORECASE): if "xhscdn.com" in m.group(0): candidates.append((m.group(0), "fallback")) logger.debug(f"[小红书视频] 找到 {len(candidates)} 个候选视频URL") t2 = time.time() final_url = self.get_watermark_free_url(candidates) logger.debug(f"[小红书视频] URL筛选耗时: {time.time()-t2:.2f}秒") total_time = time.time() - start_time logger.debug(f"[小红书视频] 解析完成,总耗时: {total_time:.2f}秒") logger.debug(f"{'='*60}\n") return { "url": final_url, "title": title, "note_id": note_id or "", }
  • Core helper method in XiaohongshuProcessor class that parses image notes: extracts metadata and dual-format (WebP/PNG) image URLs.
    def parse_image_note(self, share_text: str) -> Dict[str, any]: """解析小红书图文笔记,返回图片列表和笔记信息 返回格式: { "note_id": str, "title": str, "desc": str, "type": "image", "images": [ { "url_webp": str, # WebP 格式(体积小,适合预览) "url_png": str, # PNG 格式(无损高清,支持透明) "width": int, "height": int }, ... ] } """ import time start_time = time.time() share_url = self._extract_first_url(share_text) logger.debug(f"[小红书图文] 提取到的链接: {share_url}") # 先请求获取重定向后的真实 URL(短链接需要重定向) t1 = time.time() resp = requests.get(share_url, headers=HEADERS_XHS_PC, timeout=self.timeout, allow_redirects=True) logger.debug(f"[小红书图文] 页面请求耗时: {time.time()-t1:.2f}秒") final_url = resp.url html = resp.text # 从真实 URL 中提取 note_id(支持 /explore/ 和 /discovery/item/ 两种路径) note_match = re.search(r'/(?:explore|discovery/item)/([a-z0-9]+)', final_url) if not note_match: raise ValueError("无法从链接中提取笔记 ID") note_id = note_match.group(1) logger.debug(f"[小红书图文] Note ID: {note_id}") # 提取 __INITIAL_STATE__ 数据 t2 = time.time() data = self._extract_initial_state(html) if not data: raise ValueError("无法从页面中提取笔记数据") logger.debug(f"[小红书图文] JSON解析耗时: {time.time()-t2:.2f}秒") # 导航到笔记详情 try: note_map = data['note']['noteDetailMap'] if note_id not in note_map: raise KeyError(f"笔记 {note_id} 不在数据中") note_info = note_map[note_id]['note'] # 提取图片列表 image_list = note_info.get('imageList', []) if not image_list: raise ValueError("笔记中没有找到图片") # 处理图片 URL(同时提供 WebP 和 PNG 两种格式) t3 = time.time() images = [] for img in image_list: # 优先从 infoList 中选择 WB_DFT webp_url = None if 'infoList' in img: for info in img['infoList']: if info.get('imageScene') == 'WB_DFT': webp_url = info.get('url') break # 回退到 urlDefault if not webp_url: webp_url = img.get('urlDefault') if webp_url: # 确保 WebP URL 使用 HTTPS(避免混合内容问题) webp_url = self._ensure_https(webp_url) # 转换为 PNG URL(借鉴油猴脚本逻辑) png_url = self._convert_image_url_to_png(webp_url) # 同时保留两种格式 images.append({ 'url_webp': webp_url, # WebP 格式(体积小,适合预览)- HTTPS 'url_png': png_url if png_url else webp_url, # PNG 格式(无损高清)- HTTPS 'width': img.get('width'), 'height': img.get('height') }) # 清理标题中的非法字符 title = note_info.get('title', f'xhs_{note_id}') title = re.sub(r"[\\/:*?\"<>|]", "_", title).strip() logger.debug(f"[小红书图文] 图片URL处理耗时: {time.time()-t3:.2f}秒") total_time = time.time() - start_time logger.debug(f"[小红书图文] 解析完成,总耗时: {total_time:.2f}秒,图片数量: {len(images)}") logger.debug(f"{'='*60}\n") return { 'note_id': note_id, 'title': title, 'desc': note_info.get('desc', ''), 'type': 'image', 'images': images } except (KeyError, TypeError) as e: raise ValueError(f"解析笔记数据失败: {str(e)}")
  • Docstring providing input parameter description, output format specification, and usage instructions serving as the tool schema.
    """ 解析小红书分享链接,自动识别视频或图文类型并返回无水印资源 参数: - share_link: 小红书分享链接或包含链接的文本 返回: - 包含资源链接和信息的JSON字符串 - 自动识别类型(video/image)并返回相应格式 - 调用完成后,请将结果整理为以下纯文本格式并反馈给用户(禁止使用Markdown): 标题(如无则留空): 文案: 视频/图片链接: - 返回时请保留完整的标题和文案,不要省略或截断任何内容 - 若专用解析失败,将自动尝试 generic 兜底逻辑;调用方需同样按上述格式反馈结果 - 抖音仅返回 caption 字段,标题需由调用方自行按需补充 """
  • @mcp.tool() decorator registers the parse_xhs_link function as an MCP tool, automatically generating schema from signature and docstring.
    @mcp.tool()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Ryan7t/wanyi-watermark'

If you have feedback or need assistance with the MCP directory API, please join our Discord server