Weibo MCP Server
by Selenium39
Verified
import axios from 'axios';
import { DEFAULT_HEADERS, PROFILE_URL, FEEDS_URL, HOT_SEARCH_URL, SEARCH_CONTENT_URL } from './consts';
import { PagedFeeds, SearchResult, HotSearchItem, ContentSearchResult } from './types';
/**
* 微博爬虫类,用于从微博提取数据
* 提供获取用户资料、动态和搜索用户的功能
*/
export class WeiboCrawler {
/**
* 从微博提取用户资料信息
*
* @param uid 微博用户的唯一标识符
* @returns 用户资料信息,如果提取失败则返回空对象
*/
async extractWeiboProfile(uid: number): Promise<Record<string, any>> {
try {
const response = await axios.get(PROFILE_URL.replace('{userId}', uid.toString()), {
headers: DEFAULT_HEADERS
});
return response.data.data.userInfo;
} catch (error) {
console.error(`无法获取UID为'${uid}'的用户资料`, error);
return {};
}
}
/**
* 提取用户的微博动态,支持分页
*
* @param uid 微博用户的唯一标识符
* @param limit 最大提取的动态数量
* @returns 用户微博动态列表
*/
async extractWeiboFeeds(uid: number, limit: number): Promise<Record<string, any>[]> {
const feeds: Record<string, any>[] = [];
let sinceId = '';
try {
const containerId = await this.getContainerId(uid);
if (!containerId) return feeds;
while (feeds.length < limit) {
const pagedFeeds = await this.extractFeeds(uid, containerId, sinceId);
if (!pagedFeeds.Feeds || pagedFeeds.Feeds.length === 0) {
break;
}
feeds.push(...pagedFeeds.Feeds);
sinceId = pagedFeeds.SinceId as string;
if (!sinceId) {
break;
}
}
} catch (error) {
console.error(`无法获取UID为'${uid}'的动态`, error);
}
return feeds.slice(0, limit);
}
/**
* 根据关键词搜索微博用户
*
* @param keyword 查找用户的搜索词
* @param limit 返回的最大用户数量
* @returns 包含用户信息的SearchResult对象列表
*/
async searchWeiboUsers(keyword: string, limit: number): Promise<SearchResult[]> {
try {
const params = { 'containerid': `100103type=3&q=${keyword}&t=`, 'page_type': 'searchall' };
const searchParams = new URLSearchParams();
for (const [key, value] of Object.entries(params)) {
searchParams.append(key, value);
}
const queryString = searchParams.toString();
const response = await axios.get(`https://m.weibo.cn/api/container/getIndex?${queryString}`, {
headers: DEFAULT_HEADERS
});
const result = response.data;
const cards = result.data.cards;
if (cards.length < 2) {
return [];
} else {
const cardGroup = cards[1]['card_group'];
return cardGroup.map((item: any) => this.toSearchResult(item.user)).slice(0, limit);
}
} catch (error) {
console.error(`无法搜索关键词为'${keyword}'的用户`, error);
return [];
}
}
/**
* 将原始用户数据转换为SearchResult对象
*
* @param user 来自微博API的原始用户数据
* @returns 格式化的用户信息
*/
private toSearchResult(user: any): SearchResult {
return {
id: user.id,
nickName: user.screen_name,
avatarHD: user.avatar_hd,
description: user.description
};
}
/**
* 获取用户微博动态的容器ID
*
* @param uid 微博用户的唯一标识符
* @returns 用户动态的容器ID,如果提取失败则返回null
*/
private async getContainerId(uid: number): Promise<string | null> {
try {
const response = await axios.get(PROFILE_URL.replace('{userId}', uid.toString()), {
headers: DEFAULT_HEADERS
});
const data = response.data;
const tabsInfo = data?.data?.tabsInfo?.tabs || [];
for (const tab of tabsInfo) {
if (tab.tabKey === 'weibo') {
return tab.containerid;
}
}
return null;
} catch (error) {
console.error(`无法获取UID为'${uid}'的containerId`, error);
return null;
}
}
/**
* 提取用户的单页微博动态
*
* @param uid 微博用户的唯一标识符
* @param containerId 用户动态的容器ID
* @param sinceId 分页信息,上一页最后一条动态的ID
* @returns 包含动态和下一页since_id的PagedFeeds对象
*/
private async extractFeeds(uid: number, containerId: string, sinceId: string): Promise<PagedFeeds> {
try {
const url = FEEDS_URL
.replace('{userId}', uid.toString())
.replace('{containerId}', containerId)
.replace('{sinceId}', sinceId);
const response = await axios.get(url, { headers: DEFAULT_HEADERS });
const data = response.data;
const newSinceId = data?.data?.cardlistInfo?.since_id || '';
const cards = data?.data?.cards || [];
if (cards.length > 0) {
return { SinceId: newSinceId, Feeds: cards };
} else {
return { SinceId: newSinceId, Feeds: [] };
}
} catch (error) {
console.error(`无法获取UID为'${uid}'的动态`, error);
return { SinceId: null, Feeds: [] };
}
}
/**
* 获取微博热搜榜
*
* @param limit 返回的最大热搜条目数量
* @returns 热搜条目列表
*/
async getHotSearchList(limit: number): Promise<HotSearchItem[]> {
try {
const response = await axios.get(HOT_SEARCH_URL, {
headers: DEFAULT_HEADERS
});
const data = response.data;
const cards = data?.data?.cards || [];
if (cards.length === 0) {
return [];
}
// 查找包含热搜数据的card
let hotSearchCard = null;
for (const card of cards) {
if (card.card_group && Array.isArray(card.card_group)) {
hotSearchCard = card;
break;
}
}
if (!hotSearchCard || !hotSearchCard.card_group) {
return [];
}
// 转换热搜数据为HotSearchItem格式
const hotSearchItems: HotSearchItem[] = [];
let rank = 1;
for (const item of hotSearchCard.card_group) {
if (item.desc && rank <= limit) {
const hotSearchItem: HotSearchItem = {
keyword: item.desc,
rank: rank,
hotValue: parseInt(item.desc_extr || '0', 10),
tag: item.icon ? item.icon.slice(item.icon.lastIndexOf('/') + 1).replace('.png', '') : undefined,
url: item.scheme
};
hotSearchItems.push(hotSearchItem);
rank++;
}
}
return hotSearchItems;
} catch (error) {
console.error('无法获取微博热搜榜', error);
return [];
}
}
/**
* 根据关键词搜索微博内容
*
* @param keyword 搜索关键词
* @param limit 返回的最大微博条目数量
* @param page 起始页码(默认为1)
* @returns 微博内容列表
*/
async searchWeiboContent(keyword: string, limit: number, page: number = 1): Promise<ContentSearchResult[]> {
try {
const results: ContentSearchResult[] = [];
let currentPage = page;
while (results.length < limit) {
const url = SEARCH_CONTENT_URL
.replace('{keyword}', encodeURIComponent(keyword))
.replace('{page}', currentPage.toString());
const response = await axios.get(url, {
headers: DEFAULT_HEADERS
});
const data = response.data;
const cards = data?.data?.cards || [];
// 微博通常会返回多个卡片,我们寻找包含微博内容的卡片组
let contentCards: any[] = [];
for (const card of cards) {
// 微博内容卡片通常有card_type=9
if (card.card_type === 9) {
contentCards.push(card);
}
// 处理卡片组
else if (card.card_group && Array.isArray(card.card_group)) {
const contentGroup = card.card_group.filter((item: any) => item.card_type === 9);
contentCards = contentCards.concat(contentGroup);
}
}
if (contentCards.length === 0) {
break; // 没有更多内容,退出循环
}
// 处理每个内容卡片
for (const card of contentCards) {
if (results.length >= limit) {
break;
}
const mblog = card.mblog;
if (!mblog) continue;
// 提取图片链接
const pics: string[] = [];
if (mblog.pics && Array.isArray(mblog.pics)) {
for (const pic of mblog.pics) {
if (pic.url) {
pics.push(pic.url);
}
}
}
// 提取视频链接
let videoUrl = undefined;
if (mblog.page_info && mblog.page_info.type === 'video') {
videoUrl = mblog.page_info.media_info?.stream_url ||
mblog.page_info.urls?.mp4_720p_mp4 ||
mblog.page_info.urls?.mp4_hd_mp4 ||
mblog.page_info.urls?.mp4_ld_mp4;
}
// 创建内容搜索结果对象
const contentResult: ContentSearchResult = {
id: mblog.id,
text: mblog.text,
created_at: mblog.created_at,
reposts_count: mblog.reposts_count,
comments_count: mblog.comments_count,
attitudes_count: mblog.attitudes_count,
user: {
id: mblog.user.id,
screen_name: mblog.user.screen_name,
profile_image_url: mblog.user.profile_image_url,
verified: mblog.user.verified
},
pics: pics.length > 0 ? pics : undefined,
video_url: videoUrl
};
results.push(contentResult);
}
currentPage++;
// 检查是否有下一页
if (!data?.data?.cardlistInfo?.page || data.data.cardlistInfo.page === "1") {
break;
}
}
return results.slice(0, limit);
} catch (error) {
console.error(`无法搜索关键词为'${keyword}'的微博内容`, error);
return [];
}
}
}