simple_main.py•5.09 kB
#!/usr/bin/env python3
"""
微信公众号文章MCP服务器
"""
import logging
from typing import Any, Dict, List, Optional
import bs4
import requests
from mcp.server.fastmcp import FastMCP
from mcp.types import TextContent
# 配置日志
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
# 创建FastMCP服务器实例
server = FastMCP(
name="wechat-article-server",
instructions="一个用于获取微信公众号文章内容的MCP服务器",
debug=False,
log_level="INFO"
)
# 全局会话存储
sessions: Dict[str, requests.Session] = {}
@server.tool()
def create_session(session_id: str) -> str:
"""
创建一个新的会话
Args:
session_id: 会话ID
Returns:
创建结果消息
"""
if session_id in sessions:
return f"会话 {session_id} 已存在"
sessions[session_id] = requests.Session()
logger.info(f"创建新会话: {session_id}")
return f"成功创建会话: {session_id}"
@server.tool()
def list_sessions() -> List[str]:
"""
列出所有活跃的会话
Returns:
会话ID列表
"""
return list(sessions.keys())
@server.tool()
def get_article_content(url: str, session_id: Optional[str] = None) -> str:
"""
获取微信公众号文章内容
Args:
url: 微信公众号文章URL
session_id: 可选的会话ID,用于保持请求状态
Returns:
文章内容
"""
session = sessions.get(session_id) if session_id else requests
try:
response = session.get(url, headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
})
response.raise_for_status()
soup = bs4.BeautifulSoup(response.text, 'html.parser')
# 提取文章标题
title_elem = soup.find('h1', class_='rich_media_title')
title = title_elem.get_text().strip() if title_elem else "未找到标题"
# 提取文章内容
content_elem = soup.find('div', class_='rich_media_content')
if not content_elem:
content_elem = soup.find('div', id='js_content')
content = content_elem.get_text().strip() if content_elem else "未找到内容"
# 提取作者信息
author_elem = soup.find('a', class_='rich_media_meta_link')
author = author_elem.get_text().strip() if author_elem else "未知作者"
# 提取发布时间
time_elem = soup.find('em', class_='rich_media_meta_text')
publish_time = time_elem.get_text().strip() if time_elem else "未知时间"
result = f"标题: {title}\n"
result += f"作者: {author}\n"
result += f"发布时间: {publish_time}\n"
result += f"URL: {url}\n\n"
result += "内容:\n"
result += content
return result
except Exception as e:
logger.error(f"获取文章内容失败: {e}")
return f"获取文章内容失败: {str(e)}"
@server.tool()
def extract_article_links(url: str, session_id: Optional[str] = None) -> List[str]:
"""
从微信公众号页面提取文章链接
Args:
url: 微信公众号页面URL
session_id: 可选的会话ID,用于保持请求状态
Returns:
文章链接列表
"""
session = sessions.get(session_id) if session_id else requests
try:
response = session.get(url, headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
})
response.raise_for_status()
soup = bs4.BeautifulSoup(response.text, 'html.parser')
links = []
# 查找所有文章链接
for link in soup.find_all('a', href=True):
href = link['href']
if 'mp.weixin.qq.com' in href and 's' in href:
if not href.startswith('http'):
href = f"https://mp.weixin.qq.com{href}"
links.append(href)
return links
except Exception as e:
logger.error(f"提取文章链接失败: {e}")
return [f"提取文章链接失败: {str(e)}"]
@server.tool()
def close_session(session_id: str) -> str:
"""
关闭指定的会话
Args:
session_id: 要关闭的会话ID
Returns:
关闭结果消息
"""
if session_id not in sessions:
return f"会话 {session_id} 不存在"
sessions[session_id].close()
del sessions[session_id]
logger.info(f"关闭会话: {session_id}")
return f"成功关闭会话: {session_id}"
def main():
"""主函数"""
logger.info("启动微信公众号文章MCP服务器")
server.run(transport="stdio")
if __name__ == "__main__":
main()