Skip to main content
Glama

WeChat Article Reader MCP Server

by whbfxy
wechat_tools.py23.2 kB
""" MCP工具实现模块 实现微信公众号文章读取的MCP工具。 """ import asyncio import json import re from typing import Dict, Any, List, Optional import logging from mcp.server.models import InitializationOptions from mcp.server import Server import mcp.types as types from ..models import ( FetchArticleRequest, FetchArticleResponse, SearchArticlesRequest, SearchArticlesResponse, SearchQuery, ExtractArticleSummaryRequest, ExtractArticleSummaryResponse, ArticleMetadata, ArticleSummary, ContentFormat, SortOrder, create_error_response, generate_request_id, BrowserConfig, RequestConfig, ResponseMetadata ) from ..utils import ( BrowserClient, BrowserError, NetworkError, ParseError, ValidationError, format_mcp_tool_response, WeChatArticleParser, get_browser_client ) # 配置日志 logger = logging.getLogger(__name__) class WeChatArticleTools: """微信公众号文章工具类""" def __init__(self, browser_config, request_config): """ 初始化工具类 Args: browser_config: 浏览器配置 request_config: 请求配置 """ self.browser_config = browser_config self.request_config = request_config self.parser = WeChatArticleParser() async def fetch_article(self, request: FetchArticleRequest) -> FetchArticleResponse: """ 获取微信公众号文章 Args: request: 获取文章请求 Returns: 获取文章响应 """ request_id = request.request_id or generate_request_id() try: # 验证URL if not self._is_valid_wechat_url(request.url): raise ValidationError(f"无效的微信公众号文章URL: {request.url}") # 获取页面内容 async with get_browser_client(self.browser_config, self.request_config) as client: html_content = await client.fetch_wechat_article(request.url) # 解析文章内容 article = self.parser.parse_article(html_content, request.url) # 根据请求的格式过滤内容 filtered_content = {} for format in request.content_formats: if format in article.content: filtered_content[format] = article.content[format] # 如果没有指定格式,使用默认格式 if not filtered_content: filtered_content = article.content # 创建响应元数据 response_metadata = ResponseMetadata( request_id=request_id, success=True, message="文章获取成功" ) # 创建响应 response = FetchArticleResponse( metadata=response_metadata, article=article, content=filtered_content ) return response except (BrowserError, NetworkError, ParseError, ValidationError) as e: logger.error(f"获取文章失败: {str(e)}") response_metadata = ResponseMetadata( request_id=request_id, success=False, message=f"获取文章失败: {str(e)}" ) return FetchArticleResponse( metadata=response_metadata, error=create_error_response( error_code=e.__class__.__name__, message=str(e), request_id=request_id ) ) except Exception as e: logger.error(f"获取文章时发生未知错误: {str(e)}") error = ValidationError(f"未知错误: {str(e)}") response_metadata = ResponseMetadata( request_id=request_id, success=False, message=f"获取文章失败: {str(e)}" ) return FetchArticleResponse( metadata=response_metadata, error=create_error_response( error_code=error.__class__.__name__, message=str(error), request_id=request_id ) ) async def search_articles(self, request: SearchArticlesRequest) -> SearchArticlesResponse: """ 搜索微信公众号文章 Args: request: 搜索文章请求 Returns: 搜索文章响应 """ request_id = request.query.request_id or generate_request_id() try: # 验证查询参数 if not request.query.query: raise ValidationError("搜索关键词不能为空") # 注意:微信公众号文章搜索功能可能需要额外的API或爬虫实现 # 这里提供一个基本框架,实际实现可能需要根据具体需求调整 # 模拟搜索结果 search_results = [] # 创建响应元数据 response_metadata = ResponseMetadata( request_id=request_id, success=True, message="搜索完成" ) # 创建响应 response = SearchArticlesResponse( metadata=response_metadata, results=search_results, total=len(search_results) ) return response except ValidationError as e: logger.error(f"搜索文章失败: {str(e)}") response_metadata = ResponseMetadata( request_id=request_id, success=False, message=f"搜索文章失败: {str(e)}" ) return SearchArticlesResponse( metadata=response_metadata, error=create_error_response( error_code=e.__class__.__name__, message=str(e), request_id=request_id ) ) except Exception as e: logger.error(f"搜索文章时发生未知错误: {str(e)}") error = ValidationError(f"未知错误: {str(e)}") response_metadata = ResponseMetadata( request_id=request_id, success=False, message=f"搜索文章失败: {str(e)}" ) return SearchArticlesResponse( metadata=response_metadata, error=create_error_response( error_code=error.__class__.__name__, message=str(error), request_id=request_id ) ) async def extract_article_summary(self, request: ExtractArticleSummaryRequest) -> ExtractArticleSummaryResponse: """ 提取文章摘要 Args: request: 提取文章摘要请求 Returns: 提取文章摘要响应 """ request_id = request.request_id or generate_request_id() try: # 验证URL if not self._is_valid_wechat_url(request.url): raise ValidationError(f"无效的微信公众号文章URL: {request.url}") # 获取文章内容 fetch_request = FetchArticleRequest( url=request.url, content_formats=[ContentFormat.TEXT], request_id=request_id ) fetch_response = await self.fetch_article(fetch_request) if not fetch_response.metadata.success: error = fetch_response.error raise ValidationError(f"获取文章失败: {error.get('message', '未知错误')}") # 提取文本内容 text_content = fetch_response.content.get(ContentFormat.TEXT.value, "") # 生成摘要 summary = self._generate_summary(text_content, request.max_length) # 创建响应元数据 response_metadata = ResponseMetadata( request_id=request_id, success=True, message="摘要提取成功" ) # 获取文章标题 article_title = fetch_response.article.title if fetch_response.article else "" # 创建摘要对象 summary_obj = ArticleSummary( title=article_title, url=request.url, summary=summary ) # 创建响应 response = ExtractArticleSummaryResponse( metadata=response_metadata, summary=summary_obj ) return response except ValidationError as e: logger.error(f"提取文章摘要失败: {str(e)}") response_metadata = ResponseMetadata( request_id=request_id, success=False, message=f"提取文章摘要失败: {str(e)}" ) return ExtractArticleSummaryResponse( metadata=response_metadata, error=create_error_response( error_code=e.__class__.__name__, message=str(e), request_id=request_id ) ) except Exception as e: logger.error(f"提取文章摘要时发生未知错误: {str(e)}") error = ValidationError(f"未知错误: {str(e)}") response_metadata = ResponseMetadata( request_id=request_id, success=False, message=f"提取文章摘要失败: {str(e)}" ) return ExtractArticleSummaryResponse( metadata=response_metadata, error=create_error_response( error_code=error.__class__.__name__, message=str(error), request_id=request_id ) ) def _is_valid_wechat_url(self, url: str) -> bool: """ 验证是否为有效的微信公众号文章URL Args: url: 待验证的URL Returns: 是否为有效URL """ if not url: return False # 微信公众号文章URL的基本格式检查 wechat_patterns = [ r'^https?://mp\.weixin\.qq\.com/s', r'^https?://mp\.weixin\.qq\.com/s\?', r'^https?://mp\.weixin\.qq\.com/s/', ] for pattern in wechat_patterns: if re.match(pattern, url): return True return False def _generate_summary(self, text: str, max_length: int = 200) -> str: """ 生成文本摘要 Args: text: 原始文本 max_length: 摘要最大长度 Returns: 文本摘要 """ if not text: return "" # 简单的摘要生成:取前max_length个字符 if len(text) <= max_length: return text # 尝试在句子边界截断 summary = text[:max_length] last_sentence_end = max( summary.rfind('。'), summary.rfind('!'), summary.rfind('?'), summary.rfind('.'), summary.rfind('!'), summary.rfind('?') ) if last_sentence_end > max_length * 0.8: # 如果截断点在合理位置 summary = summary[:last_sentence_end + 1] else: summary = summary + "..." return summary # MCP服务器实例将在main.py中创建 async def handle_list_tools() -> List[types.Tool]: """列出可用工具""" return [ types.Tool( name="fetch_article", description="获取微信公众号文章内容", inputSchema={ "type": "object", "properties": { "url": { "type": "string", "description": "微信公众号文章URL" }, "content_formats": { "type": "array", "items": { "type": "string", "enum": ["html", "markdown", "text"] }, "description": "返回的内容格式", "default": ["markdown", "text"] }, "request_id": { "type": "string", "description": "请求ID(可选)" } }, "required": ["url"] } ), types.Tool( name="search_articles", description="搜索微信公众号文章", inputSchema={ "type": "object", "properties": { "query": { "type": "string", "description": "搜索关键词" }, "account": { "type": "string", "description": "指定公众号名称(可选)" }, "sort_by": { "type": "string", "enum": ["relevance", "time", "popularity"], "description": "排序方式", "default": "relevance" }, "limit": { "type": "integer", "description": "返回结果数量限制", "default": 10 }, "request_id": { "type": "string", "description": "请求ID(可选)" } }, "required": ["query"] } ), types.Tool( name="extract_article_summary", description="提取微信公众号文章摘要", inputSchema={ "type": "object", "properties": { "url": { "type": "string", "description": "微信公众号文章URL" }, "max_length": { "type": "integer", "description": "摘要最大长度", "default": 200 }, "request_id": { "type": "string", "description": "请求ID(可选)" } }, "required": ["url"] } ) ] async def handle_call_tool(name: str, arguments: Dict[str, Any]) -> List[types.TextContent]: """处理工具调用""" try: # 从环境变量或默认配置中获取配置 browser_config = BrowserConfig() request_config = RequestConfig() # 创建工具实例 tools = WeChatArticleTools(browser_config, request_config) if name == "fetch_article": # 解析参数 url = arguments.get("url") content_formats = arguments.get("content_formats", ["markdown", "text"]) request_id = arguments.get("request_id") # 转换为枚举类型 format_enums = [] for fmt in content_formats: try: format_enums.append(ContentFormat(fmt)) except ValueError: logger.warning(f"无效的内容格式: {fmt}") # 创建请求 request = FetchArticleRequest( url=url, content_formats=format_enums, request_id=request_id ) # 调用工具 response = await tools.fetch_article(request) # 返回结果 if response.metadata.success: # 格式化响应 result = { "success": True, "request_id": response.metadata.request_id, "message": response.metadata.message, "article": { "url": response.article.url, "title": response.article.title, "author": response.article.author, "publish_time": response.article.publish_time, "account_name": response.article.account_name, "account_id": response.article.account_id, "read_count": response.article.read_count, "like_count": response.article.like_count, "comment_count": response.article.comment_count, "extracted_at": response.article.extracted_at }, "content": response.content } return [types.TextContent(type="text", text=json.dumps(result, ensure_ascii=False, indent=2))] else: # 返回错误 error = response.error error_response = format_mcp_tool_response( tool_name="fetch_article", error=error, response_format="json" ) return [types.TextContent(type="text", text=error_response[0]["text"])] elif name == "search_articles": # 解析参数 query = arguments.get("query") account = arguments.get("account") sort_by = arguments.get("sort_by", "relevance") limit = arguments.get("limit", 10) request_id = arguments.get("request_id") # 转换为枚举类型 try: sort_by_enum = SortOrder(sort_by) except ValueError: sort_by_enum = SortOrder.RELEVANCE # 创建查询请求 query_request = SearchQuery( query=query, account=account, sort_by=sort_by_enum, limit=limit, request_id=request_id ) # 创建搜索请求 request = SearchArticlesRequest(query=query_request) # 调用工具 response = await tools.search_articles(request) # 返回结果 if response.metadata.success: # 格式化响应 result = { "success": True, "request_id": response.metadata.request_id, "message": response.metadata.message, "total": response.total, "results": [] } for article in response.results: result["results"].append({ "url": article.url, "title": article.title, "author": article.author, "publish_time": article.publish_time, "account_name": article.account_name, "account_id": article.account_id, "read_count": article.read_count, "like_count": article.like_count, "comment_count": article.comment_count }) return [types.TextContent(type="text", text=json.dumps(result, ensure_ascii=False, indent=2))] else: # 返回错误 error = response.error error_response = format_mcp_tool_response( tool_name="search_articles", error=error, response_format="json" ) return [types.TextContent(type="text", text=error_response[0]["text"])] elif name == "extract_article_summary": # 解析参数 url = arguments.get("url") max_length = arguments.get("max_length", 200) request_id = arguments.get("request_id") # 创建请求 request = ExtractArticleSummaryRequest( url=url, max_length=max_length, request_id=request_id ) # 调用工具 response = await tools.extract_article_summary(request) # 返回结果 if response.metadata.success: # 格式化响应 result = { "success": True, "request_id": response.metadata.request_id, "message": response.metadata.message, "summary": response.summary.to_dict() if response.summary else None } return [types.TextContent(type="text", text=json.dumps(result, ensure_ascii=False, indent=2))] else: # 返回错误 error = response.error error_response = format_mcp_tool_response( tool_name="extract_article_summary", error=error, response_format="json" ) return [types.TextContent(type="text", text=error_response[0]["text"])] else: # 未知工具 error = { "error": True, "error_type": "ValueError", "error_code": "UNKNOWN_TOOL", "message": f"未知工具: {name}" } error_response = format_mcp_tool_response( tool_name="unknown", error=error, response_format="json" ) return [types.TextContent(type="text", text=error_response[0]["text"])] except Exception as e: # 处理异常 error = { "error": True, "error_type": "RuntimeError", "error_code": "TOOL_EXECUTION_ERROR", "message": f"工具执行失败: {str(e)}" } error_response = format_mcp_tool_response( tool_name="unknown", error=error, response_format="json" ) return [types.TextContent(type="text", text=error_response[0]["text"])]

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/whbfxy/MCP101Demo'

If you have feedback or need assistance with the MCP directory API, please join our Discord server