Arcs-MCP

Overview Schema Related Servers Score Discussions

crawler.py•7.34 KiB

# -*- coding: utf-8 -*- import asyncio import json import logging import re from typing import Optional, List, Dict, Any from DrissionPage._functions.keys import Keys from DrissionPage._pages.mix_tab import MixTab from base import AbstractCrawler from config import WECHAT_PUBLIC_ACCOUNT, WECHAT_AUTHOR, WECHAT_MARKDOWN2HTML, WECHAT_ARTICLE_PUBLISH_MODE, \ WECHAT_COVER_IMAGE from environment import get_chromium_browser_signal from extension.crawler_factory import get_crawler_setup_source from extension.wechat.client import WeChatClient from utils import request, logger, pyperclip_paste class WeChatCrawler(AbstractCrawler): def __init__(self): self.type_crawler = "WeChat Crawler" self.domain_crawler = ".weixin.qq.com" self._weChatClient = WeChatClient() async def article_path_proc(self, file_name: str, md_content: str): # html_content = markdown2.markdown(md_content, extras=AbstractCrawler.extensions2) # 已作废 browser, executor = get_chromium_browser_signal() loop = asyncio.get_running_loop() html_content = await loop.run_in_executor(executor, self.tab_md2html_actions, browser, md_content) if html_content: return { 'TITLE': file_name, 'AUTHOR': WECHAT_AUTHOR, # 自定义作者名称 'DIGEST': None, 'CONTENT': html_content, 'CONTENT_SOURCE_URL': None, 'THUMB_MEDIA_ID': WECHAT_COVER_IMAGE, # 文章封面（必须为永久素材id） 'X1_Y1_X2_Y2': None, } async def init_config(self, file_name: str, md_content: str, image_results: Optional[List[Dict[str, Any]]]): logger.info(f"[{self.type_crawler}] Start initializing and processing image links.") status_code, access_token_json = await request(method="POST", url="https://api.weixin.qq.com/cgi-bin/stable_token", json_data=WECHAT_PUBLIC_ACCOUNT, timeout=10) self._weChatClient.access_token = access_token_json["access_token"] results = await self.image_process(image_results) if results: for image_path in results: if image_path and "old_image_url" in image_path and "new_image_url" in image_path: md_content = md_content.replace(image_path["old_image_url"], image_path["new_image_url"]) self._weChatClient.pre_json_data = await self.article_path_proc(file_name, md_content) async def run(self): logger.info(f'[{self.type_crawler}] Start publishing articles.') json_data = json.dumps(self._weChatClient.pre_json_data, ensure_ascii=False).encode('utf-8') status_code, result_json = await request("POST", url=self._weChatClient.pre_publish_url, content=json_data, headers=self._weChatClient.headers, timeout=10) if 200 <= status_code < 300: self._weChatClient.json_data = result_json["media_id"] self._weChatClient.mp_json_data = result_json["media_id"] if WECHAT_ARTICLE_PUBLISH_MODE == "1": if 200 <= status_code < 300 and result_json["media_id"] is not None: return {'result': AbstractCrawler.SUCCESS_RESULT} else: logging.error(f"[{self.type_crawler}] Failure to pre-publish the article! Cause of error:{str(result_json)}") return {'result': AbstractCrawler.FAILURE_RESULT} if WECHAT_ARTICLE_PUBLISH_MODE == "2": status_code, result_json = await self.request_post(self._weChatClient.publish_url, self._weChatClient.json_data) if WECHAT_ARTICLE_PUBLISH_MODE == "3": status_code, result_json = await self.request_post(self._weChatClient.mp_publish_url, self._weChatClient.mp_json_data) if 200 <= status_code < 300 and result_json.get("errcode") == 0: return {'result': AbstractCrawler.SUCCESS_RESULT} else: logging.error(f"[{self.type_crawler}] Failure to publish the article! Cause of error:{str(result_json)}") return {'result': AbstractCrawler.FAILURE_RESULT} async def request_post(self, url_type: str, json_data_type: Optional[Dict]): return await request("POST", url=url_type, json_data=json_data_type, headers=self._weChatClient.headers, timeout=10) async def image_process(self, image_results): if image_results: tasks = [self.image_upload(image_result) for image_result in image_results if image_result] results = await asyncio.gather(*tasks) return results async def image_upload(self, image_result): pattern = r'[^/]+\.(png|jpg|jpeg|gif|bmp|svg|webp)(?=\?|$)' # 正则表达式匹配文件名 match = re.search(pattern, image_result["image_url"]) if match: image_filename = match.group(0) image_filenames = image_filename.split(".") files = { "media": (image_filenames[0] + ".png", image_result["image_content"], 'image/png'), } status_code, response_json = await request(method="POST", url=self._weChatClient.uploadimg_url, files=files, timeout=10 ) if 200 <= status_code < 300: return {"old_image_url": image_result["image_url"], "new_image_url": response_json["url"]} def tab_md2html_actions(self, browser, md_content): tab: MixTab = browser.new_tab() try: tab.get(WECHAT_MARKDOWN2HTML) # 跳转md2html格式转换页面 tab.refresh() tab.actions\ .click(on_ele=tab.ele(self._weChatClient.loc_code_mirror_scroll_editor))\ .type(Keys.CTRL_A).key_down(Keys.BACKSPACE)\ .input(md_content).wait(0.15) tab.wait.load_start() paste_adapt = lambda: tab.actions.click(on_ele=tab.ele(self._weChatClient.loc_nice_sidebar_wechat_copy)) html_content = pyperclip_paste(post_action=paste_adapt) return html_content except Exception as e: logger.error(f'[{self.type_crawler}] Failure to md2html conversion of the article! Cause of error:{e}') finally: tab.close() async def login_as(self): try: status_code, access_token_json = await request(method="POST", url="https://api.weixin.qq.com/cgi-bin/stable_token", json_data=WECHAT_PUBLIC_ACCOUNT, timeout=3) get_crawler_setup_source().update({"wechat": "access_token" in access_token_json}) except Exception as e: logger.error(f'[{self.type_crawler}] Failure to login as the account! Cause of error:{e}')

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Cyanty/Arcs-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

crawler.py•7.34 KiB