Skip to main content
Glama
hmumixaM

USCardForum MCP Server

by hmumixaM
cloudflare.py19 kB
"""Cloudflare bypass utilities. Provides functions and constants for bypassing Cloudflare protection using multiple strategies in order: 1. Playwright with stealth (real browser - most effective) 2. curl_cffi (real browser TLS fingerprints) 3. cloudscraper (multiple browser profiles) """ from __future__ import annotations import logging import subprocess import time from typing import Any import cloudscraper import requests logger = logging.getLogger(__name__) # Track if we've already tried to install Playwright browsers _playwright_browsers_installed = False # Cloudflare-related status codes that might be worth retrying CLOUDFLARE_RETRY_CODES = {403, 429, 503, 520, 521, 522, 523, 524} # Browser profiles for cloudscraper BROWSER_PROFILES = [ {"browser": "chrome", "platform": "linux", "desktop": True}, {"browser": "chrome", "platform": "windows", "desktop": True}, {"browser": "chrome", "platform": "darwin", "desktop": True}, {"browser": "firefox", "platform": "linux", "desktop": True}, {"browser": "firefox", "platform": "windows", "desktop": True}, ] # curl_cffi browser impersonation profiles (TLS fingerprints) CURL_CFFI_IMPERSONATES = [ "chrome120", "chrome119", "chrome110", "chrome107", "chrome104", "edge101", "safari15_5", ] # Common headers that make requests look more like a real browser BROWSER_HEADERS = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "none", "Sec-Fetch-User": "?1", "Cache-Control": "max-age=0", } def _ensure_playwright_browsers() -> bool: """Ensure Playwright browsers are installed. Attempts to install Chromium if not already installed. Only tries once per process to avoid repeated installation attempts. Returns: True if browsers are available, False otherwise """ global _playwright_browsers_installed if _playwright_browsers_installed: return True try: from playwright.sync_api import sync_playwright except ImportError: logger.warning("Playwright package not installed") return False # Try to launch browser to check if it's installed try: with sync_playwright() as p: browser = p.chromium.launch(headless=True) browser.close() _playwright_browsers_installed = True logger.info("Playwright browsers already installed") return True except Exception as e: logger.info(f"Playwright browsers not installed, attempting installation: {e}") # Try to install browsers try: logger.info("Installing Playwright Chromium browser...") result = subprocess.run( ["playwright", "install", "chromium"], capture_output=True, text=True, timeout=300, # 5 minute timeout ) if result.returncode == 0: logger.info("Playwright Chromium installed successfully") _playwright_browsers_installed = True return True else: logger.warning(f"Playwright install failed: {result.stderr}") return False except subprocess.TimeoutExpired: logger.warning("Playwright installation timed out") return False except FileNotFoundError: logger.warning("playwright command not found") return False except Exception as e: logger.warning(f"Failed to install Playwright browsers: {e}") return False def create_cloudflare_session( delay: int = 3, browser: str = "chrome", platform: str = "linux", ) -> requests.Session: """Create a cloudscraper session configured for Cloudflare bypass. Args: delay: Delay in seconds for challenge solving (default: 3) browser: Browser to emulate (chrome, firefox) platform: Platform to emulate (linux, windows, darwin) Returns: A cloudscraper session ready for Cloudflare-protected sites """ session = cloudscraper.create_scraper( browser={"browser": browser, "platform": platform, "desktop": True}, delay=delay, ) session.headers.update(BROWSER_HEADERS) return session class CurlCffiSessionWrapper: """Wrapper to make curl_cffi.Session behave like requests.Session. This allows curl_cffi to be used as a drop-in replacement. """ def __init__(self, session: Any = None, impersonate: str = "chrome120"): """Initialize wrapper with existing session or create new one. Args: session: Existing curl_cffi Session to wrap (reuses cookies) impersonate: Browser to impersonate if creating new session """ if session is not None: self._session = session else: from curl_cffi.requests import Session self._session = Session(impersonate=impersonate) self._impersonate = impersonate self.headers = dict(BROWSER_HEADERS) self.cookies = self._session.cookies def get(self, url: str, **kwargs) -> Any: """Make GET request.""" kwargs.setdefault("timeout", 15) # Merge headers headers = dict(self.headers) if "headers" in kwargs: headers.update(kwargs["headers"]) kwargs["headers"] = headers return self._session.get(url, **kwargs) def post(self, url: str, **kwargs) -> Any: """Make POST request.""" kwargs.setdefault("timeout", 15) headers = dict(self.headers) if "headers" in kwargs: headers.update(kwargs["headers"]) kwargs["headers"] = headers return self._session.post(url, **kwargs) def request(self, method: str, url: str, **kwargs) -> Any: """Make any HTTP request.""" kwargs.setdefault("timeout", 15) headers = dict(self.headers) if "headers" in kwargs: headers.update(kwargs["headers"]) kwargs["headers"] = headers return self._session.request(method, url, **kwargs) def _create_session_with_curl_cffi( base_url: str, timeout_seconds: float = 15.0, ) -> Any | None: """Create a session using curl_cffi with browser TLS fingerprints. curl_cffi impersonates real browser TLS fingerprints, which is very effective against Cloudflare's TLS fingerprinting. Args: base_url: The base URL to test against timeout_seconds: Timeout for test requests Returns: A curl_cffi session wrapper (reusing the session that passed), or None if failed """ try: from curl_cffi.requests import Session except ImportError: logger.warning("curl_cffi not available, skipping TLS fingerprint bypass") return None base_url = base_url.rstrip("/") for impersonate in CURL_CFFI_IMPERSONATES: try: session = Session(impersonate=impersonate) # Test if this impersonation works test_resp = session.get( f"{base_url}/", timeout=timeout_seconds, allow_redirects=True, ) if test_resp.status_code == 200: logger.info(f"Cloudflare bypass successful with curl_cffi impersonate={impersonate}") # IMPORTANT: Return wrapper with the SAME session that passed (keeps cookies!) wrapper = CurlCffiSessionWrapper(session=session, impersonate=impersonate) return wrapper elif test_resp.status_code == 403: logger.warning(f"curl_cffi {impersonate} got 403, trying next...") continue except Exception as e: logger.warning(f"curl_cffi {impersonate} failed: {e}, trying next...") continue logger.warning("All curl_cffi impersonations failed") return None def _create_session_with_playwright( base_url: str, timeout_seconds: float = 30.0, headless: bool = True, ) -> requests.Session | None: """Create a requests session using cookies obtained from Playwright with stealth. Uses a real browser to solve Cloudflare challenges and transfers cookies to a requests session. Will attempt to install browsers if not present. Args: base_url: The base URL to navigate to timeout_seconds: Timeout for page load headless: Run browser in headless mode (default: True) Returns: A requests.Session with Cloudflare cookies, or None if failed """ # Ensure browsers are installed (this also verifies imports work) if not _ensure_playwright_browsers(): logger.warning("Playwright browsers not available") return None base_url = base_url.rstrip("/") session = requests.Session() session.headers.update(BROWSER_HEADERS) try: # Import here after ensuring browsers are installed from playwright.sync_api import sync_playwright from playwright_stealth import Stealth stealth = Stealth() with sync_playwright() as p: # Launch browser with stealth settings browser = p.chromium.launch( headless=headless, args=[ "--disable-blink-features=AutomationControlled", "--disable-dev-shm-usage", "--no-sandbox", "--disable-setuid-sandbox", "--disable-infobars", "--window-size=1920,1080", "--start-maximized", ], ) # Create context with realistic viewport and user agent context = browser.new_context( viewport={"width": 1920, "height": 1080}, user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", locale="en-US", timezone_id="America/New_York", ) page = context.new_page() # Apply stealth to avoid detection stealth.apply_stealth_sync(page) # Navigate to the site and wait for Cloudflare challenge to complete logger.info(f"Playwright: Navigating to {base_url}") page.goto(base_url, wait_until="networkidle", timeout=timeout_seconds * 1000) # Wait additional time for any JS challenges page.wait_for_timeout(3000) # Check if we got past Cloudflare content = page.content().lower() if "cloudflare" in content and ("challenge" in content or "checking" in content): # Wait longer for challenge to complete logger.info("Playwright: Waiting for Cloudflare challenge...") page.wait_for_timeout(5000) # Get cookies from browser cookies = context.cookies() logger.info(f"Playwright: Got {len(cookies)} cookies") # Transfer cookies to requests session for cookie in cookies: session.cookies.set( cookie["name"], cookie["value"], domain=cookie.get("domain", ""), path=cookie.get("path", "/"), ) # Get the user agent used user_agent = page.evaluate("navigator.userAgent") session.headers["User-Agent"] = user_agent browser.close() # Test if session works test_resp = session.get( f"{base_url}/", timeout=timeout_seconds, allow_redirects=True, ) if test_resp.status_code == 200: logger.info("Playwright: Session created successfully") return session else: logger.warning(f"Playwright: Test request got status {test_resp.status_code}") return None except ImportError as e: logger.warning(f"Playwright import failed: {e}") return None except Exception as e: logger.error(f"Playwright fallback failed: {e}") return None def create_cloudflare_session_with_fallback( base_url: str, timeout_seconds: float = 15.0, use_playwright: bool = True, use_curl_cffi: bool = True, use_cloudscraper: bool = True, ) -> Any: """Create a session with Cloudflare bypass, trying multiple strategies. Order of attempts (Playwright first as most effective): 1. Playwright with stealth (real browser - best success rate) 2. curl_cffi with real browser TLS fingerprints 3. cloudscraper with different browser profiles Args: base_url: The base URL to test against timeout_seconds: Timeout for test requests use_playwright: Try Playwright first (default: True) use_curl_cffi: Try curl_cffi (default: True) use_cloudscraper: Try cloudscraper (default: True) Returns: A session configured to bypass Cloudflare """ base_url = base_url.rstrip("/") # Strategy 1: Try Playwright with stealth (most effective) if use_playwright: logger.info("Trying Playwright with stealth (most effective)...") playwright_session = _create_session_with_playwright( base_url, timeout_seconds=30.0, headless=True, ) if playwright_session is not None: return playwright_session logger.warning("Playwright failed, trying next strategy...") # Strategy 2: Try curl_cffi with TLS fingerprinting if use_curl_cffi: logger.info("Trying curl_cffi with TLS fingerprints...") curl_session = _create_session_with_curl_cffi(base_url, timeout_seconds) if curl_session is not None: return curl_session logger.warning("curl_cffi failed, trying next strategy...") # Strategy 3: Try cloudscraper with different profiles if use_cloudscraper: logger.info("Trying cloudscraper profiles...") for profile in BROWSER_PROFILES: try: session = cloudscraper.create_scraper( browser=profile, delay=3, ) session.headers.update(BROWSER_HEADERS) test_resp = session.get( f"{base_url}/", timeout=timeout_seconds, allow_redirects=True, ) if test_resp.status_code == 200: logger.info(f"Cloudflare bypass successful with cloudscraper profile: {profile}") return session elif test_resp.status_code == 403: logger.warning(f"Cloudscraper profile {profile} got 403, trying next...") continue except Exception as e: logger.warning(f"Cloudscraper profile {profile} failed: {e}, trying next...") continue # Ultimate fallback: return cloudscraper session even if it didn't work logger.warning("All bypass methods failed, using basic cloudscraper as fallback") session = cloudscraper.create_scraper( browser={"browser": "chrome", "platform": "linux", "desktop": True}, delay=5, ) session.headers.update(BROWSER_HEADERS) return session def warm_up_session( session: Any, base_url: str, timeout_seconds: float = 15.0, with_delay: bool = True, ) -> bool: """Warm up a session to obtain Cloudflare cookies. Args: session: The session to warm up base_url: Base URL of the site timeout_seconds: Request timeout with_delay: Add delays between requests Returns: True if at least one warm-up request succeeded """ base_url = base_url.rstrip("/") warmup_urls = [ f"{base_url}/", f"{base_url}/about", ] success = False for i, url in enumerate(warmup_urls): try: resp = session.get( url, timeout=timeout_seconds, allow_redirects=True, ) if resp.status_code == 200: success = True logger.debug(f"Warm-up successful: {url}") else: logger.warning(f"Warm-up got status {resp.status_code}: {url}") # Add delay between requests if with_delay and i < len(warmup_urls) - 1: time.sleep(0.5) except Exception as e: logger.warning(f"Warm-up failed for {url}: {e}") return success def extended_warm_up( session: Any, base_url: str, timeout_seconds: float = 15.0, ) -> None: """Extended warm-up with delays and multiple page visits. More aggressive than basic warm-up to help with Cloudflare challenges. Args: session: The session to warm up base_url: Base URL of the site timeout_seconds: Request timeout """ base_url = base_url.rstrip("/") warmup_urls = [ f"{base_url}/", f"{base_url}/about", f"{base_url}/categories", f"{base_url}/top", ] for i, url in enumerate(warmup_urls): try: resp = session.get( url, timeout=timeout_seconds, allow_redirects=True, ) if resp.status_code == 200: logger.debug(f"Warm-up successful for {url}") else: logger.warning(f"Warm-up got status {resp.status_code} for {url}") # Add delay between requests to avoid rate limiting if i < len(warmup_urls) - 1: time.sleep(0.5) except Exception as e: logger.warning(f"Warm-up failed for {url}: {e}") def is_cloudflare_challenge(response: Any) -> bool: """Check if a response is a Cloudflare challenge page. Args: response: The response to check Returns: True if this looks like a Cloudflare challenge """ if response.status_code != 200: return False content_type = response.headers.get("Content-Type", "") if "text/html" not in content_type: return False content_lower = response.text[:1000].lower() return "cloudflare" in content_lower or "challenge" in content_lower def is_cloudflare_error(status_code: int) -> bool: """Check if a status code is a Cloudflare-related error. Args: status_code: HTTP status code Returns: True if this is a Cloudflare-related error code """ return status_code in CLOUDFLARE_RETRY_CODES

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/hmumixaM/uscardforum-mcp4'

If you have feedback or need assistance with the MCP directory API, please join our Discord server