Skip to main content
Glama
Skynotdie

MCP Localization Project

by Skynotdie
web_search_playwright.py7.94 kB
#!/usr/bin/env python3 """ Web Search Playwright - Playwright 기반 브라우저 자동화 고급 안티봇 우회 시스템과 스텔스 브라우저 자동화를 제공합니다. """ import asyncio import time import random import logging from typing import Dict, Tuple from web_search_base import SearchEngine logger = logging.getLogger(__name__) class PlaywrightManager: """Playwright 관리자 - 고급 안티봇 우회""" def __init__(self): self.playwright = None self.browser = None self.context = None self.is_initialized = False async def initialize(self, headless: bool = True, stealth: bool = True): """Playwright 초기화""" try: from playwright.async_api import async_playwright self.playwright = await async_playwright().start() # 브라우저 실행 옵션 launch_options = { 'headless': headless, 'args': [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-web-security', '--disable-blink-features=AutomationControlled', '--disable-extensions-except=/path/to/extension', '--disable-extensions', '--no-first-run', '--no-default-browser-check', '--disable-default-apps', '--disable-popup-blocking' ] } self.browser = await self.playwright.chromium.launch(**launch_options) # 컨텍스트 생성 (스텔스 모드) context_options = { 'user_agent': self._get_realistic_user_agent(), 'viewport': {'width': 1920, 'height': 1080}, 'locale': 'ko-KR', 'timezone_id': 'Asia/Seoul', 'extra_http_headers': { 'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7', 'Accept-Encoding': 'gzip, deflate, br', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Cache-Control': 'max-age=0' } } self.context = await self.browser.new_context(**context_options) if stealth: await self._apply_stealth_mode() self.is_initialized = True logger.info("Playwright 초기화 완료") except ImportError: logger.warning("Playwright가 설치되지 않음. pip install playwright 실행 필요") self.is_initialized = False except Exception as e: logger.error(f"Playwright 초기화 실패: {e}") self.is_initialized = False def _get_realistic_user_agent(self) -> str: """현실적인 User-Agent 생성""" agents = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36' ] return random.choice(agents) async def _apply_stealth_mode(self): """스텔스 모드 적용""" try: # JavaScript로 웹드라이버 탐지 우회 stealth_script = """ // webdriver 속성 숨기기 Object.defineProperty(navigator, 'webdriver', { get: () => undefined, }); // Chrome 객체 모킹 window.chrome = { runtime: {}, loadTimes: function() {}, csi: function() {}, app: {} }; // Permissions API 우회 const originalQuery = window.navigator.permissions.query; window.navigator.permissions.query = (parameters) => ( parameters.name === 'notifications' ? Promise.resolve({ state: Notification.permission }) : originalQuery(parameters) ); // Plugin 배열 수정 Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5], }); // Languages 수정 Object.defineProperty(navigator, 'languages', { get: () => ['ko-KR', 'ko', 'en-US', 'en'], }); """ await self.context.add_init_script(stealth_script) logger.debug("스텔스 모드 적용 완료") except Exception as e: logger.warning(f"스텔스 모드 적용 실패: {e}") async def search_with_playwright(self, url: str, selectors: Dict[str, str]) -> Tuple[str, Dict]: """Playwright로 검색 실행""" if not self.is_initialized: await self.initialize() if not self.is_initialized: raise Exception("Playwright 초기화 실패") page = await self.context.new_page() try: # 페이지 로드 대기 await page.goto(url, wait_until='networkidle') # 랜덤 지연 (인간적 행동 모방) await asyncio.sleep(random.uniform(1, 3)) # 스크롤 시뮬레이션 await page.evaluate("window.scrollTo(0, document.body.scrollHeight / 4)") await asyncio.sleep(random.uniform(0.5, 1.5)) # HTML 추출 html_content = await page.content() # 추가 데이터 수집 metadata = { 'title': await page.title(), 'url': page.url, 'viewport': await page.viewport_size(), 'load_time': time.time() } # 콘솔 로그 및 네트워크 요청 모니터링 console_logs = [] network_requests = [] page.on('console', lambda msg: console_logs.append({ 'type': msg.type, 'text': msg.text, 'location': msg.location })) page.on('request', lambda request: network_requests.append({ 'url': request.url, 'method': request.method, 'headers': dict(request.headers) })) metadata['console_logs'] = console_logs[-10:] # 최근 10개만 metadata['network_requests'] = network_requests[-20:] # 최근 20개만 return html_content, metadata except Exception as e: logger.error(f"Playwright 검색 실패: {e}") raise finally: await page.close() async def cleanup(self): """리소스 정리""" try: if self.context: await self.context.close() if self.browser: await self.browser.close() if self.playwright: await self.playwright.stop() self.is_initialized = False logger.info("Playwright 정리 완료") except Exception as e: logger.error(f"Playwright 정리 실패: {e}")

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Skynotdie/mky'

If you have feedback or need assistance with the MCP directory API, please join our Discord server