Weibo MCP Server
by qinyuanpei
Verified
- mcp-server-weibo
- src
- mcp_server_weibo
import httpx
import logging
from urllib.parse import urlencode
from .consts import DEFAULT_HEADERS, PROFILE_URL, FEEDS_URL
from .schemas import PagedFeeds, SearchResult
class WeiboCrawler:
"""
A crawler class for extracting data from Weibo (Chinese social media platform).
Provides functionality to fetch user profiles, feeds, and search for users.
"""
def __init__(self):
self.logger = logging.getLogger(__name__)
async def extract_weibo_profile(self, uid: int) -> dict:
"""
Extract user profile information from Weibo.
Args:
uid (int): The unique identifier of the Weibo user
Returns:
dict: User profile information or empty dict if extraction fails
"""
async with httpx.AsyncClient() as client:
try:
response = await client.get(PROFILE_URL.format(userId=uid), headers=DEFAULT_HEADERS)
result = response.json()
return result["data"]["userInfo"]
except httpx.HTTPError:
self.logger.error(f"Unable to eextract profile for uid '{str(uid)}'", exc_info=True)
return {}
async def extract_weibo_feeds(self, uid: int, limit: int) -> list[dict]:
"""
Extract user's Weibo feeds (posts) with pagination support.
Args:
uid (int): The unique identifier of the Weibo user
limit (int): Maximum number of feeds to extract
Returns:
list: List of user's Weibo feeds
"""
feeds = []
sinceId = ''
async with httpx.AsyncClient() as client:
containerId = await self._get_container_id(client, uid)
while len(feeds) < limit:
pagedFeeds = await self._extract_feeds(client, uid, containerId, sinceId)
if not pagedFeeds.Feeds:
break
feeds.extend(pagedFeeds.Feeds)
sinceId = pagedFeeds.SinceId
if not sinceId:
break
return feeds
async def search_weibo_users(self, keyword: str, limit: int) -> list[SearchResult]:
"""
Search for Weibo users based on a keyword.
Args:
keyword (str): Search term to find users
limit (int): Maximum number of users to return
Returns:
list: List of SearchResult objects containing user information
"""
async with httpx.AsyncClient() as client:
try:
params = {'containerid': f'100103type=3&q={keyword}&t=', 'page_type': 'searchall'}
encoded_params = urlencode(params)
response = await client.get(f'https://m.weibo.cn/api/container/getIndex?{encoded_params}', headers=DEFAULT_HEADERS)
result = response.json()
cards = result["data"]["cards"]
if len(cards) < 2:
return []
else:
cardGroup = cards[1]['card_group']
return [self._to_search_result(item['user']) for item in cardGroup][:limit]
except httpx.HTTPError:
self.logger.error(f"Unable to search users for keyword '{keyword}'", exc_info=True)
return []
def _to_search_result(self, user: dict) -> SearchResult:
"""
Convert raw user data to SearchResult object.
Args:
user (dict): Raw user data from Weibo API
Returns:
SearchResult: Formatted user information
"""
return SearchResult(
id=user['id'],
nickName=user['screen_name'],
avatarHD=user['avatar_hd'],
description=user['description']
)
async def _get_container_id(self, client, uid: int):
"""
Get the container ID for a user's Weibo feed.
Args:
client (httpx.AsyncClient): HTTP client instance
uid (int): The unique identifier of the Weibo user
Returns:
str: Container ID for the user's feed or None if extraction fails
"""
try:
response = await client.get(PROFILE_URL.format(userId=str(uid)), headers=DEFAULT_HEADERS)
data = response.json()
tabs_info = data.get("data", {}).get("tabsInfo", {}).get("tabs", [])
for tab in tabs_info:
if tab.get("tabKey") == "weibo":
return tab.get("containerid")
except httpx.HTTPError:
self.logger.error(f"Unable to extract containerId for uid '{str(uid)}'", exc_info=True)
return None
async def _extract_feeds(self, client, uid: int, container_id: str, since_id: str):
"""
Extract a single page of Weibo feeds for a user.
Args:
client (httpx.AsyncClient): HTTP client instance
uid (int): The unique identifier of the Weibo user
container_id (str): Container ID for the user's feed
since_id (str): ID of the last feed for pagination
Returns:
PagedFeeds: Object containing feeds and next page's since_id
"""
try:
url = FEEDS_URL.format(userId=str(uid), containerId=container_id, sinceId=since_id)
response = await client.get(url, headers = DEFAULT_HEADERS)
data = response.json()
new_since_id = data.get("data", {}).get("cardlistInfo", {}).get("since_id", "")
cards = data.get("data", {}).get("cards", [])
mblogs = cards
if mblogs:
return PagedFeeds(SinceId=new_since_id, Feeds=mblogs)
else:
return PagedFeeds(SinceId=new_since_id, Feeds=[])
except httpx.HTTPError:
self.logger.error(f"Unable to extract feeds for uid '{str(uid)}'", exc_info=True)
return PagedFeeds(SinceId=None, Feeds=[])