Skip to main content
Glama
Skynotdie

MCP Localization Project

by Skynotdie
web_search_antibot.py7.66 kB
#!/usr/bin/env python3 """ Web Search Anti-Bot - 고도화된 안티봇 우회 관리자 지능형 봇 탐지 우회와 적응형 요청 관리를 제공합니다. """ import time import random import logging from typing import Dict, List import requests from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry from web_search_base import SearchEngine logger = logging.getLogger(__name__) class EnhancedAntiBotManager: """고도화된 안티봇 우회 관리자""" def __init__(self): self.user_agents = self._initialize_user_agents() self.proxies = [] self.request_history = [] self.success_rates = {} self.last_request_times = {} # 지능형 지연 시스템 self.base_delays = [0.5, 1.0, 1.5, 2.0, 3.0, 5.0] self.adaptive_delays = {} # 세션 관리 self.sessions = {} self.session_cookies = {} def _initialize_user_agents(self) -> Dict[str, List[str]]: """플랫폼별 User-Agent 초기화""" return { 'desktop_chrome': [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36' ], 'desktop_firefox': [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/119.0', 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0' ], 'mobile': [ 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1', 'Mozilla/5.0 (Linux; Android 13; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Mobile Safari/537.36' ] } def get_optimized_headers(self, engine: SearchEngine, platform: str = 'desktop_chrome') -> Dict[str, str]: """검색 엔진별 최적화된 헤더 생성""" base_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7', 'Accept-Encoding': 'gzip, deflate, br', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'cross-site', 'Cache-Control': 'max-age=0', 'DNT': '1' } # User-Agent 선택 if platform in self.user_agents: user_agent = random.choice(self.user_agents[platform]) else: user_agent = random.choice(self.user_agents['desktop_chrome']) base_headers['User-Agent'] = user_agent # 검색 엔진별 특화 헤더 if engine == SearchEngine.GOOGLE: base_headers.update({ 'Sec-Fetch-Site': 'none', 'Sec-Ch-Ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"', 'Sec-Ch-Ua-Mobile': '?0', 'Sec-Ch-Ua-Platform': '"Windows"' }) elif engine == SearchEngine.BING: base_headers.update({ 'Referer': 'https://www.bing.com/', 'X-Edge-Shopping-Flag': '1' }) elif engine == SearchEngine.DUCKDUCKGO: base_headers.update({ 'Referer': 'https://duckduckgo.com/', 'X-Requested-With': 'XMLHttpRequest' }) return base_headers def calculate_adaptive_delay(self, engine: SearchEngine, domain: str) -> float: """적응형 지연 계산""" key = f"{engine.value}:{domain}" # 기본 지연 base_delay = random.choice(self.base_delays) # 성공률 기반 조정 if key in self.success_rates: success_rate = self.success_rates[key] if success_rate < 0.7: # 성공률이 낮으면 지연 증가 base_delay *= 2.0 elif success_rate > 0.9: # 성공률이 높으면 지연 감소 base_delay *= 0.7 # 최근 요청 간격 고려 if key in self.last_request_times: elapsed = time.time() - self.last_request_times[key] if elapsed < 5: # 최근 요청이 너무 가까우면 추가 지연 base_delay += random.uniform(2, 5) # 랜덤 지터 추가 jitter = random.uniform(-0.3, 0.5) * base_delay final_delay = max(0.1, base_delay + jitter) return final_delay def update_success_rate(self, engine: SearchEngine, domain: str, success: bool): """성공률 업데이트""" key = f"{engine.value}:{domain}" if key not in self.success_rates: self.success_rates[key] = 1.0 if success else 0.0 else: # 지수 이동 평균으로 업데이트 alpha = 0.1 current_rate = self.success_rates[key] new_value = 1.0 if success else 0.0 self.success_rates[key] = alpha * new_value + (1 - alpha) * current_rate self.last_request_times[key] = time.time() def get_session(self, engine: SearchEngine) -> requests.Session: """검색 엔진별 최적화된 세션 생성""" session_key = engine.value if session_key not in self.sessions: session = requests.Session() # 재시도 전략 (검색 엔진별 최적화) if engine == SearchEngine.GOOGLE: retry_strategy = Retry( total=3, backoff_factor=2, status_forcelist=[429, 500, 502, 503, 504, 403], respect_retry_after_header=True ) else: retry_strategy = Retry( total=2, backoff_factor=1.5, status_forcelist=[429, 500, 502, 503, 504], ) adapter = HTTPAdapter(max_retries=retry_strategy, pool_maxsize=10) session.mount("http://", adapter) session.mount("https://", adapter) # 세션별 쿠키 복원 if session_key in self.session_cookies: session.cookies.update(self.session_cookies[session_key]) self.sessions[session_key] = session # 헤더 업데이트 (매 요청마다) domain = self._get_engine_domain(engine) headers = self.get_optimized_headers(engine) self.sessions[session_key].headers.update(headers) return self.sessions[session_key] def _get_engine_domain(self, engine: SearchEngine) -> str: """검색 엔진 도메인 반환""" domain_map = { SearchEngine.GOOGLE: 'google.com', SearchEngine.BING: 'bing.com', SearchEngine.DUCKDUCKGO: 'duckduckgo.com', SearchEngine.YANDEX: 'yandex.com', SearchEngine.BAIDU: 'baidu.com' } return domain_map.get(engine, 'unknown.com')

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Skynotdie/mky'

If you have feedback or need assistance with the MCP directory API, please join our Discord server