Skip to main content
Glama
crawler.py3.26 kB
import asyncio import logging from typing import Dict from DrissionPage._functions.keys import Keys from base import AbstractCrawler from config import CSDN_LOC_TAG from environment import get_chromium_browser_signal from extension.crawler_factory import get_crawler_setup_source from extension.csdn.client import CsdnClient from utils import logger, github_proxy_url class CsdnCrawler(AbstractCrawler): def __init__(self): self.type_crawler = "CSDN Crawler" self.domain_crawler = ".csdn.net" self._csdnClient = CsdnClient() async def article_path_proc(self, file_name: str, md_content: str): for old_str in github_proxy_url(): md_content = md_content.replace(old_str, '') self._csdnClient.title_name = file_name self._csdnClient.md_content = md_content async def init_config(self, file_name: str, md_content: str, image_results=None): logger.info(f"[{self.type_crawler}] Start initializing the article operation.") await self.article_path_proc(file_name, md_content) async def run(self): logger.info(f'[{self.type_crawler}] Start publishing articles.') browser, executor = get_chromium_browser_signal() loop = asyncio.get_running_loop() return await loop.run_in_executor(executor, self.tab_publish_actions, browser) def tab_publish_actions(self, browser) -> Dict: tab = browser.new_tab() try: tab.get(self._csdnClient.edit_url) tab.actions \ .click(on_ele=tab.ele(self._csdnClient.loc_title)).input(self._csdnClient.title_name) \ .click(on_ele=tab.ele(self._csdnClient.loc_content)).input(self._csdnClient.md_content).wait(0.25) tab.wait.load_start() tab.actions \ .click(on_ele=tab.ele(self._csdnClient.loc_send_button)).wait(0.25) \ .move_to(ele_or_loc=tab.ele(self._csdnClient.loc_add_tag)).wait(0.25) \ .click(on_ele=tab.ele(self._csdnClient.loc_tag_input)).input(CSDN_LOC_TAG).wait(1) \ .key_down(Keys.ENTER).wait(0.25) \ .click(on_ele=tab.ele(self._csdnClient.loc_close_button)) \ .click(on_ele=tab.ele(self._csdnClient.loc_publish_button)) tab.wait.load_start() return {'result': AbstractCrawler.SUCCESS_RESULT} except Exception as e: logging.error(f'[{self.type_crawler}] Failure to publish the article! Cause of error:{e}') return {'result': AbstractCrawler.FAILURE_RESULT} finally: tab.close() async def login_as(self): browser, executor = get_chromium_browser_signal() loop = asyncio.get_running_loop() return await loop.run_in_executor(executor, self.login_as_sync, browser) def login_as_sync(self, browser): tab = browser.new_tab() try: tab.get(self._csdnClient.verify_login_url) tab.wait.load_start() get_crawler_setup_source().update({"csdn": tab.url != self._csdnClient.login_url}) except Exception as e: logger.error(f'[{self.type_crawler}] Login page failed to validate! Cause of error:{e}') finally: tab.close()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Cyanty/Arcs-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server