"""
浏览器管理器
负责 Playwright 浏览器实例的创建、配置和生命周期管理。
"""
import asyncio
from typing import Optional, Dict, Any
from pathlib import Path
from playwright.async_api import async_playwright, Browser, BrowserContext, Page
from playwright_stealth import Stealth
from loguru import logger
from ..storage.cookie_storage import CookieStorage
class BrowserManager:
"""浏览器管理器"""
def __init__(
self,
headless: bool = False,
browser_type: str = "chromium",
user_data_dir: Optional[Path] = None,
cookie_storage: Optional[CookieStorage] = None,
executable_path: Optional[str] = None
):
"""
初始化浏览器管理器
Args:
headless: 是否无头模式
browser_type: 浏览器类型 (chromium, firefox, webkit)
user_data_dir: 用户数据目录
cookie_storage: Cookie 存储实例
executable_path: 浏览器可执行文件路径(用于使用本地浏览器)
如果未设置,则从配置读取或使用 Playwright 自带的浏览器
"""
self.headless = headless
self.browser_type = browser_type
self.user_data_dir = user_data_dir
self.cookie_storage = cookie_storage or CookieStorage()
# 如果未指定 executable_path,尝试从配置读取
if executable_path is None:
from ..config.settings import Settings
executable_path = Settings.BROWSER_EXECUTABLE_PATH
self.executable_path = executable_path
self._playwright = None
self._browser: Optional[Browser] = None
self._context: Optional[BrowserContext] = None
self._page: Optional[Page] = None
def is_started(self) -> bool:
"""检查浏览器是否已启动"""
return self._playwright is not None and self._browser is not None
def is_valid(self) -> bool:
"""检查浏览器是否仍然有效(未关闭)"""
try:
if not self.is_started():
return False
# 检查浏览器是否已关闭
if self._browser and hasattr(self._browser, 'is_connected'):
return self._browser.is_connected()
# 检查页面是否有效
if self._page and hasattr(self._page, 'is_closed'):
return not self._page.is_closed()
return True
except Exception:
return False
async def ensure_started(self) -> None:
"""确保浏览器已启动且有效,如果无效则重启"""
if not self.is_valid():
logger.warning("浏览器无效或已关闭,正在重启...")
await self.restart()
async def restart(self, load_cookies: bool = True) -> None:
"""
重启浏览器
Args:
load_cookies: 是否在重启后加载cookies,默认为True
"""
logger.info("重启浏览器")
await self.stop()
# 临时保存原始的cookie_storage,以便在不加载cookies时使用空的存储
original_cookie_storage = None
if not load_cookies:
original_cookie_storage = self.cookie_storage
# 创建一个临时的空cookie存储
from ..storage.cookie_storage import CookieStorage
self.cookie_storage = CookieStorage(cookie_path="/tmp/empty_cookies.json")
await self.start()
# 恢复原始的cookie_storage
if original_cookie_storage:
self.cookie_storage = original_cookie_storage
logger.info("浏览器重启完成")
async def start(self) -> None:
"""启动浏览器"""
if self._playwright is not None:
logger.warning("浏览器已经启动")
return
logger.info(f"启动浏览器 (headless={self.headless}, type={self.browser_type})")
if self.executable_path:
logger.info(f"使用本地浏览器: {self.executable_path}")
self._playwright = await async_playwright().start()
# 获取浏览器类型
browser_launcher = getattr(self._playwright, self.browser_type)
# 浏览器启动参数
from ..config import BrowserConfig
launch_options = {
"headless": self.headless,
"args": BrowserConfig.BROWSER_ARGS
}
# 如果指定了浏览器可执行文件路径,使用本地浏览器
if self.executable_path:
launch_options["executable_path"] = self.executable_path
# 如果指定了用户数据目录
if self.user_data_dir:
launch_options["user_data_dir"] = str(self.user_data_dir)
self._browser = await browser_launcher.launch(**launch_options)
# 创建浏览器上下文 - 使用默认配置
context_options = {
"viewport": {"width": BrowserConfig.VIEWPORT_WIDTH, "height": BrowserConfig.VIEWPORT_HEIGHT},
"user_agent": BrowserConfig.USER_AGENT,
"java_script_enabled": True,
"accept_downloads": True,
"ignore_https_errors": True,
}
self._context = await self._browser.new_context(**context_options)
# 加载 cookies
await self._load_cookies()
# 创建页面
self._page = await self._context.new_page()
# 应用反检测脚本
await self._apply_stealth(self._page)
logger.info("浏览器启动成功")
async def stop(self, save_cookies: bool = True) -> None:
"""
停止浏览器
Args:
save_cookies: 是否保存cookies,默认为True
"""
if not self.is_started():
return
logger.info("停止浏览器")
# 根据参数决定是否保存 cookies
if save_cookies:
await self._save_cookies()
if self._page:
await self._page.close()
self._page = None
if self._context:
await self._context.close()
self._context = None
if self._browser:
await self._browser.close()
self._browser = None
if self._playwright:
await self._playwright.stop()
self._playwright = None
logger.info("浏览器已停止")
async def get_page(self) -> Page:
"""获取页面实例"""
await self.ensure_started()
if self._page is None:
await self.start()
return self._page
async def new_page(self) -> Page:
"""创建新页面"""
if self._context is None:
await self.start()
page = await self._context.new_page()
# 应用反检测脚本
await self._apply_stealth(page)
return page
async def load_cookies(self) -> None:
"""加载 cookies(公共方法)"""
await self._load_cookies()
async def save_cookies(self) -> bool:
"""保存 cookies(公共方法)"""
return await self._save_cookies()
async def clear_all_data(self) -> bool:
"""
清除浏览器的所有数据(cookies、缓存、本地存储等)
Returns:
是否清除成功
"""
if not self.is_started():
logger.warning("浏览器未启动,无法清除数据")
return False
try:
# 清除浏览器上下文中的所有数据
if self._context:
# 清除 cookies
await self._context.clear_cookies()
# 清除本地存储和会话存储
if self._page:
# 清除 localStorage
await self._page.evaluate("() => { localStorage.clear(); }")
# 清除 sessionStorage
await self._page.evaluate("() => { sessionStorage.clear(); }")
# 清除 indexedDB
await self._page.evaluate("""
() => {
if (window.indexedDB) {
return new Promise((resolve) => {
const databases = indexedDB.databases ? indexedDB.databases() : Promise.resolve([]);
databases.then(dbs => {
const deletePromises = dbs.map(db => {
return new Promise((deleteResolve) => {
const deleteReq = indexedDB.deleteDatabase(db.name);
deleteReq.onsuccess = () => deleteResolve();
deleteReq.onerror = () => deleteResolve();
});
});
Promise.all(deletePromises).then(() => resolve());
}).catch(() => resolve());
});
}
}
""")
# 清除缓存(如果支持)
try:
await self._context.clear_permissions()
except Exception as e:
logger.debug(f"清除权限失败(可能不支持): {e}")
# 清除本地 cookie 文件
if self.cookie_storage:
self.cookie_storage.clear_cookies()
logger.info("已清除浏览器的所有数据")
return True
except Exception as e:
logger.error(f"清除浏览器数据失败: {e}")
return False
async def _save_cookies(self) -> bool:
"""保存 cookies"""
if self._context is None:
return False
try:
cookies = await self._context.cookies()
ok = await self.cookie_storage.save_cookies(cookies)
logger.info(f"保存了 {len(cookies)} 个 cookies")
return ok
except Exception as e:
logger.error(f"保存 cookies 失败: {e}")
return False
async def _load_cookies(self) -> None:
"""加载 cookies"""
if self._context is None:
return
try:
cookies = await self.cookie_storage.load_cookies()
if cookies:
await self._context.add_cookies(cookies)
logger.info(f"加载了 {len(cookies)} 个 cookies")
except Exception as e:
logger.warning(f"加载 cookies 失败: {e}")
async def __aenter__(self):
"""异步上下文管理器入口"""
await self.start()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""异步上下文管理器出口"""
await self.stop()
async def cleanup(self) -> None:
"""清理浏览器资源(别名方法)"""
await self.stop()
async def _apply_stealth(self, page: Page) -> None:
"""
应用 playwright-stealth 反检测脚本到页面
Args:
page: Playwright 页面实例
"""
try:
stealth = Stealth()
await stealth.apply_stealth_async(page)
logger.debug("已应用 playwright-stealth 反检测脚本")
except Exception as e:
logger.warning(f"应用反检测脚本失败: {e},继续执行")