Skip to main content
Glama
page_controller.py8.62 kB
""" 页面控制器 提供页面操作的高级封装,包括导航、元素查找、等待等功能。 """ import asyncio from typing import Optional, Union, List, Dict, Any from playwright.async_api import Page, Locator, TimeoutError as PlaywrightTimeoutError from loguru import logger class PageController: """页面控制器""" def __init__(self, page: Page): """ 初始化页面控制器 Args: page: Playwright 页面实例 """ self.page = page from ..config import BrowserConfig self.default_timeout = BrowserConfig.ELEMENT_TIMEOUT async def navigate(self, url: str, wait_until: str = "domcontentloaded") -> None: """ 导航到指定URL Args: url: 目标URL wait_until: 等待条件 """ logger.info(f"导航到: {url}") try: # 检查页面是否仍然有效 if self.page.is_closed(): logger.error("页面已关闭,无法导航") raise Exception("页面已关闭,需要重新初始化浏览器") await self.page.goto(url, wait_until=wait_until, timeout=self.default_timeout) logger.info("页面加载完成") except PlaywrightTimeoutError: logger.error(f"导航超时: {url}") raise except Exception as e: error_msg = str(e) if "Target page, context or browser has been closed" in error_msg or "page.is_closed" in error_msg: logger.error("浏览器或页面已关闭,需要重新初始化") raise Exception("浏览器已关闭,需要重新初始化") logger.error(f"导航失败: {e}") raise def is_page_valid(self) -> bool: """检查页面是否仍然有效""" try: return not self.page.is_closed() except Exception: return False async def wait_for_element( self, selector: str, timeout: Optional[int] = None, state: str = "visible" ) -> Locator: """ 等待元素出现 Args: selector: CSS选择器 timeout: 超时时间(毫秒) state: 元素状态 (visible, hidden, attached, detached) Returns: 元素定位器 """ timeout = timeout or self.default_timeout logger.debug(f"等待元素: {selector} (state={state})") try: locator = self.page.locator(selector) await locator.wait_for(state=state, timeout=timeout) return locator except PlaywrightTimeoutError: logger.error(f"等待元素超时: {selector}") raise async def has_element(self, selector: str, timeout: int = 5000) -> bool: """ 检查元素是否存在 Args: selector: CSS选择器 timeout: 超时时间(毫秒) Returns: 元素是否存在 """ try: await self.wait_for_element(selector, timeout=timeout) return True except PlaywrightTimeoutError: return False async def click_element( self, selector: str, timeout: Optional[int] = None, force: bool = False ) -> None: """ 点击元素 Args: selector: CSS选择器 timeout: 超时时间(毫秒) force: 是否强制点击 """ logger.debug(f"点击元素: {selector}") locator = await self.wait_for_element(selector, timeout) await locator.click(force=force) async def input_text( self, selector: str, text: str, timeout: Optional[int] = None, clear: bool = True ) -> None: """ 输入文本 Args: selector: CSS选择器 text: 输入的文本 timeout: 超时时间(毫秒) clear: 是否先清空输入框 """ logger.debug(f"输入文本到: {selector}") locator = await self.wait_for_element(selector, timeout) if clear: await locator.clear() await locator.fill(text) async def get_text(self, selector: str, timeout: Optional[int] = None) -> str: """ 获取元素文本 Args: selector: CSS选择器 timeout: 超时时间(毫秒) Returns: 元素文本内容 """ locator = await self.wait_for_element(selector, timeout) return await locator.text_content() or "" async def get_attribute( self, selector: str, attribute: str, timeout: Optional[int] = None ) -> Optional[str]: """ 获取元素属性 Args: selector: CSS选择器 attribute: 属性名 timeout: 超时时间(毫秒) Returns: 属性值 """ locator = await self.wait_for_element(selector, timeout) return await locator.get_attribute(attribute) async def wait_for_stable(self, timeout: int = 5000) -> None: """ 等待页面稳定 Args: timeout: 超时时间(毫秒) """ logger.debug("等待页面稳定") try: # 首先等待网络空闲 await self.page.wait_for_load_state("networkidle", timeout=timeout) logger.debug("页面网络空闲状态达成") except PlaywrightTimeoutError: logger.warning("等待页面网络空闲超时,尝试其他等待策略") try: # 如果网络空闲失败,尝试等待DOM内容加载完成 await self.page.wait_for_load_state("domcontentloaded", timeout=min(timeout, 5000)) logger.debug("页面DOM内容加载完成") # 额外等待一小段时间让页面渲染完成 await asyncio.sleep(1) logger.debug("页面额外等待完成") except PlaywrightTimeoutError: logger.warning("所有页面稳定性等待策略都超时") # 不抛出异常,让调用方决定如何处理 async def scroll_to_element(self, selector: str, timeout: Optional[int] = None) -> None: """ 滚动到元素位置 Args: selector: CSS选择器 timeout: 超时时间(毫秒) """ locator = await self.wait_for_element(selector, timeout) await locator.scroll_into_view_if_needed() async def take_screenshot(self, path: Optional[str] = None) -> bytes: """ 截图 Args: path: 保存路径(可选) Returns: 截图数据 """ logger.debug(f"截图: {path or '内存'}") return await self.page.screenshot(path=path, full_page=True) async def evaluate_script(self, script: str) -> Any: """ 执行JavaScript脚本 Args: script: JavaScript代码 Returns: 脚本执行结果 """ logger.debug(f"执行脚本: {script[:100]}...") return await self.page.evaluate(script) async def wait_for_navigation(self, timeout: Optional[int] = None) -> None: """ 等待页面导航完成 Args: timeout: 超时时间(毫秒) """ timeout = timeout or self.default_timeout logger.debug("等待页面导航") try: await self.page.wait_for_load_state("domcontentloaded", timeout=timeout) except PlaywrightTimeoutError: logger.error("等待页面导航超时") raise async def get_cookies(self) -> List[Dict[str, Any]]: """ 获取当前页面的cookies Returns: Cookie列表 """ context = self.page.context return await context.cookies() async def set_cookies(self, cookies: List[Dict[str, Any]]) -> None: """ 设置cookies Args: cookies: Cookie列表 """ context = self.page.context await context.add_cookies(cookies)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/luyike221/xiaohongshu-mcp-python'

If you have feedback or need assistance with the MCP directory API, please join our Discord server