#!/usr/bin/env python3
"""
Web Search Playwright - Playwright 기반 브라우저 자동화
고급 안티봇 우회 시스템과 스텔스 브라우저 자동화를 제공합니다.
"""
import asyncio
import time
import random
import logging
from typing import Dict, Tuple
from web_search_base import SearchEngine
logger = logging.getLogger(__name__)
class PlaywrightManager:
"""Playwright 관리자 - 고급 안티봇 우회"""
def __init__(self):
self.playwright = None
self.browser = None
self.context = None
self.is_initialized = False
async def initialize(self, headless: bool = True, stealth: bool = True):
"""Playwright 초기화"""
try:
from playwright.async_api import async_playwright
self.playwright = await async_playwright().start()
# 브라우저 실행 옵션
launch_options = {
'headless': headless,
'args': [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-web-security',
'--disable-blink-features=AutomationControlled',
'--disable-extensions-except=/path/to/extension',
'--disable-extensions',
'--no-first-run',
'--no-default-browser-check',
'--disable-default-apps',
'--disable-popup-blocking'
]
}
self.browser = await self.playwright.chromium.launch(**launch_options)
# 컨텍스트 생성 (스텔스 모드)
context_options = {
'user_agent': self._get_realistic_user_agent(),
'viewport': {'width': 1920, 'height': 1080},
'locale': 'ko-KR',
'timezone_id': 'Asia/Seoul',
'extra_http_headers': {
'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
'Accept-Encoding': 'gzip, deflate, br',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Cache-Control': 'max-age=0'
}
}
self.context = await self.browser.new_context(**context_options)
if stealth:
await self._apply_stealth_mode()
self.is_initialized = True
logger.info("Playwright 초기화 완료")
except ImportError:
logger.warning("Playwright가 설치되지 않음. pip install playwright 실행 필요")
self.is_initialized = False
except Exception as e:
logger.error(f"Playwright 초기화 실패: {e}")
self.is_initialized = False
def _get_realistic_user_agent(self) -> str:
"""현실적인 User-Agent 생성"""
agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
]
return random.choice(agents)
async def _apply_stealth_mode(self):
"""스텔스 모드 적용"""
try:
# JavaScript로 웹드라이버 탐지 우회
stealth_script = """
// webdriver 속성 숨기기
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined,
});
// Chrome 객체 모킹
window.chrome = {
runtime: {},
loadTimes: function() {},
csi: function() {},
app: {}
};
// Permissions API 우회
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: Notification.permission }) :
originalQuery(parameters)
);
// Plugin 배열 수정
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5],
});
// Languages 수정
Object.defineProperty(navigator, 'languages', {
get: () => ['ko-KR', 'ko', 'en-US', 'en'],
});
"""
await self.context.add_init_script(stealth_script)
logger.debug("스텔스 모드 적용 완료")
except Exception as e:
logger.warning(f"스텔스 모드 적용 실패: {e}")
async def search_with_playwright(self, url: str, selectors: Dict[str, str]) -> Tuple[str, Dict]:
"""Playwright로 검색 실행"""
if not self.is_initialized:
await self.initialize()
if not self.is_initialized:
raise Exception("Playwright 초기화 실패")
page = await self.context.new_page()
try:
# 페이지 로드 대기
await page.goto(url, wait_until='networkidle')
# 랜덤 지연 (인간적 행동 모방)
await asyncio.sleep(random.uniform(1, 3))
# 스크롤 시뮬레이션
await page.evaluate("window.scrollTo(0, document.body.scrollHeight / 4)")
await asyncio.sleep(random.uniform(0.5, 1.5))
# HTML 추출
html_content = await page.content()
# 추가 데이터 수집
metadata = {
'title': await page.title(),
'url': page.url,
'viewport': await page.viewport_size(),
'load_time': time.time()
}
# 콘솔 로그 및 네트워크 요청 모니터링
console_logs = []
network_requests = []
page.on('console', lambda msg: console_logs.append({
'type': msg.type,
'text': msg.text,
'location': msg.location
}))
page.on('request', lambda request: network_requests.append({
'url': request.url,
'method': request.method,
'headers': dict(request.headers)
}))
metadata['console_logs'] = console_logs[-10:] # 최근 10개만
metadata['network_requests'] = network_requests[-20:] # 최근 20개만
return html_content, metadata
except Exception as e:
logger.error(f"Playwright 검색 실패: {e}")
raise
finally:
await page.close()
async def cleanup(self):
"""리소스 정리"""
try:
if self.context:
await self.context.close()
if self.browser:
await self.browser.close()
if self.playwright:
await self.playwright.stop()
self.is_initialized = False
logger.info("Playwright 정리 완료")
except Exception as e:
logger.error(f"Playwright 정리 실패: {e}")