Skip to main content
Glama
xhs.utils.ts7 kB
/** * Utility functions for XHS operations */ import { Page } from 'puppeteer'; import { logger } from './logger'; export const XHS_HOME_URL = 'https://www.xiaohongshu.com'; export const XHS_EXPLORE_URL = `${XHS_HOME_URL}/explore`; export const XHS_SEARCH_URL = `${XHS_HOME_URL}/search_result`; export const XHS_CREATOR_PUBLISH_URL = 'https://creator.xiaohongshu.com/publish/publish?source=official'; export const LOGIN_OK_SELECTOR = '.main-container .user .link-wrapper .channel'; export function makeSearchUrl(keyword: string): string { const params = new URLSearchParams({ keyword, source: 'web_explore_feed', }); return `${XHS_SEARCH_URL}?${params.toString()}`; } export function makeFeedDetailUrl(feedId: string, xsecToken: string): string { const params = new URLSearchParams({ xsec_token: xsecToken, xsec_source: 'pc_feed', }); return `${XHS_EXPLORE_URL}/${feedId}?${params.toString()}`; } export async function extractInitialState(page: Page): Promise<Record<string, unknown> | null> { try { // Wait for page to be fully loaded - Puppeteer doesn't have waitForLoadState // We'll just wait a bit for the page to settle await new Promise<void>((resolve) => globalThis.setTimeout(resolve, 1000)); } catch { // Ignore load state errors } try { const result = await page.evaluate(` (() => { // Try multiple possible state objects const possibleStates = [ window.__INITIAL_STATE__, window.__INITIAL_SSR_STATE__, window.__NEXT_DATA__, window.__NUXT__, window.__VUE__, window.__REACT_QUERY_STATE__ ]; for (const state of possibleStates) { if (state && typeof state === 'object') { try { // Use a more robust JSON serialization that handles circular references const seen = new WeakSet(); return JSON.stringify(state, (key, val) => { if (val != null && typeof val === "object") { if (seen.has(val)) { return "[Circular]"; } seen.add(val); } return val; }); } catch (e) { logger.warn('JSON.stringify failed for state:', e.message); continue; } } } // If no state found, try to find any global state const globalKeys = Object.keys(window).filter(key => key.includes('STATE') || key.includes('DATA') || key.includes('INITIAL') ); for (const key of globalKeys) { const value = window[key]; if (value && typeof value === 'object') { try { const seen = new WeakSet(); return JSON.stringify(value, (key, val) => { if (val != null && typeof val === "object") { if (seen.has(val)) { return "[Circular]"; } seen.add(val); } return val; }); } catch (e) { logger.warn('JSON.stringify failed for global key:', key, e.message); continue; } } } return ''; })() `); if (!result) { return null; } return JSON.parse(result as string); } catch { return null; } } export async function isLoggedIn(page: Page): Promise<boolean> { try { const elements = await page.$$(LOGIN_OK_SELECTOR); return elements.length > 0; } catch { return false; } } export async function getLoginStatusWithProfile(page: Page): Promise<{ isLoggedIn: boolean; profile?: { userId?: string; nickname?: string; username?: string; avatar?: string; followers?: number; following?: number; likes?: number; xhsNumber?: string; ipLocation?: string; profileUrl?: string; }; }> { try { // First check if logged in using the existing method const elements = await page.$$(LOGIN_OK_SELECTOR); const isLoggedIn = elements.length > 0; if (!isLoggedIn) { return { isLoggedIn: false }; } // If logged in, try to extract profile information from current page let profileData: Record<string, unknown> = {}; try { profileData = await page.evaluate(() => { const profile: Record<string, unknown> = {}; // Extract user ID from URL if on profile page // eslint-disable-next-line no-undef const urlMatch = window.location.href.match(/\/user\/profile\/([a-f0-9]+)/); if (urlMatch) { profile.userId = urlMatch[1]; } // Try to find user nickname // eslint-disable-next-line no-undef const nameElement = document.querySelector( '.user-name, [class*="user-name"], [class*="nickname"]' ); if (nameElement) { profile.nickname = nameElement.textContent?.trim(); } // Try to find user info text that might contain stats // eslint-disable-next-line no-undef const infoElement = document.querySelector('.user-info, [class*="user-info"]'); if (infoElement) { const infoText = infoElement.textContent || ''; profile.infoText = infoText; // Try to extract numbers from the info text (followers, following, likes) const numbers = infoText.match(/\d+/g); if (numbers && numbers.length >= 3) { // Common pattern: followers, following, likes profile.following = parseInt(numbers[0]) || 0; profile.followers = parseInt(numbers[1]) || 0; profile.likes = parseInt(numbers[2]) || 0; } // Extract 小红书号 (XHS number) const xhsMatch = infoText.match(/小红书号:(\d+)/); if (xhsMatch) { profile.xhsNumber = xhsMatch[1]; } // Extract IP属地 (IP location) const ipMatch = infoText.match(/IP属地:([^0-9]+)/); if (ipMatch) { profile.ipLocation = ipMatch[1].trim(); } } // Try to find avatar // eslint-disable-next-line no-undef const avatarElement = document.querySelector( 'img[class*="avatar"], img[class*="profile"], .avatar img, .profile img' ) as HTMLImageElement; if (avatarElement) { profile.avatar = avatarElement.src; } return profile; }); } catch { logger.error('Error in page.evaluate'); profileData = {}; } return { isLoggedIn: true, profile: Object.keys(profileData).length > 0 ? profileData : undefined, }; } catch { // If there's an error, fall back to basic login check try { const elements = await page.$$(LOGIN_OK_SELECTOR); return { isLoggedIn: elements.length > 0 }; } catch { return { isLoggedIn: false }; } } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Algovate/xhs-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server