query_table
by wukan1986
- query_table
import subprocess
import time
from pathlib import Path
from typing import Optional, Tuple
import pandas as pd
from loguru import logger
from playwright.async_api import async_playwright, Playwright, Browser, BrowserContext, Page
from query_table.enums import QueryType, Site
async def launch_browser(playwright: Optional[Playwright] = None,
port: int = 9222,
browser_path: Optional[str] = None) -> Tuple[Playwright, Browser, BrowserContext, Page]:
r"""启动浏览器,并连接CDP协议
Parameters
----------
playwright:
同步playwright对象。设成None时,会自动启动playwright对象。
port:int
浏览器调试端口
browser_path
浏览器可执行路径。推荐使用chrome,因为Microsoft Edge必须在任务管理器中完全退出才能启动调试端口
Returns
-------
playwright, browser, context, page
References
----------
https://blog.csdn.net/qq_30576521/article/details/142370538
"""
if playwright is None:
playwright = await async_playwright().start()
try:
# 尝试连接已打开的浏览器
browser = await playwright.chromium.connect_over_cdp(f"http://127.0.0.1:{port}", slow_mo=1000, timeout=5000)
except:
if browser_path is None:
browser_path = r'C:\Program Files\Google\Chrome\Application\chrome.exe'
if not Path(browser_path).exists():
# Microsoft Edge必须在任务管理器中完全退出才能启动调试端口
browser_path = r'C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe'
if not Path(browser_path).exists():
raise ValueError("未找到浏览器可执行文件")
# 执行完成后不会关闭浏览器
command = f'"{browser_path}" --remote-debugging-port={port} --start-maximized'
logger.info(f"start browser:{command}")
subprocess.Popen(command, shell=True)
time.sleep(3)
try:
browser = await playwright.chromium.connect_over_cdp(f"http://127.0.0.1:{port}", slow_mo=1000, timeout=5000)
except:
logger.warning("是否提前打开了浏览器,但未开启远程调试端口?请关闭浏览器全部进程后重试")
raise
context = browser.contexts[0]
page = context.pages[0]
return playwright, browser, context, page
async def query(page: Page,
query_input: str = "收盘价>100元",
query_type: QueryType = QueryType.CNStock,
max_page: int = 5,
site: Site = Site.THS) -> pd.DataFrame:
"""查询表格
Parameters
----------
page : playwright.sync_api.Page
页面
query_input : str, optional
查询条件, by default "收盘价>100元"
query_type : QueryType, optional
查询类型, by default QueryType.astock
max_page : int, optional
最大页数, by default 5
site : Site, optional
站点, by default Site.iwencai
Returns
-------
pd.DataFrame
查询结果
"""
if site == Site.EastMoney:
from query_table.sites.eastmoney import query
return await query(page, query_input, query_type, max_page)
if site == Site.THS:
from query_table.sites.iwencai import query
return await query(page, query_input, query_type, max_page)
if site == Site.TDX:
from query_table.sites.tdx import query
return await query(page, query_input, query_type, max_page)
raise ValueError(f"未支持的站点:{site}")