We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/GodisinHisHeaven/uscardforum-mcp'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
"""Cloudflare bypass utilities.
Provides functions and constants for bypassing Cloudflare protection
using multiple strategies in order:
1. Playwright with stealth (real browser - most effective)
2. curl_cffi (real browser TLS fingerprints)
3. cloudscraper (multiple browser profiles)
"""
from __future__ import annotations
import logging
import subprocess
import time
from typing import Any
import cloudscraper
import requests
logger = logging.getLogger(__name__)
# Track if we've already tried to install Playwright browsers
_playwright_browsers_installed = False
# Cloudflare-related status codes that might be worth retrying
CLOUDFLARE_RETRY_CODES = {403, 429, 503, 520, 521, 522, 523, 524}
# Browser profiles for cloudscraper
BROWSER_PROFILES = [
{"browser": "chrome", "platform": "linux", "desktop": True},
{"browser": "chrome", "platform": "windows", "desktop": True},
{"browser": "chrome", "platform": "darwin", "desktop": True},
{"browser": "firefox", "platform": "linux", "desktop": True},
{"browser": "firefox", "platform": "windows", "desktop": True},
]
# curl_cffi browser impersonation profiles (TLS fingerprints)
CURL_CFFI_IMPERSONATES = [
"chrome120",
"chrome119",
"chrome110",
"chrome107",
"chrome104",
"edge101",
"safari15_5",
]
# Common headers that make requests look more like a real browser
BROWSER_HEADERS = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Cache-Control": "max-age=0",
}
def _ensure_playwright_browsers() -> bool:
"""Ensure Playwright browsers are installed.
Attempts to install Chromium if not already installed.
Only tries once per process to avoid repeated installation attempts.
Returns:
True if browsers are available, False otherwise
"""
global _playwright_browsers_installed
if _playwright_browsers_installed:
return True
try:
from playwright.sync_api import sync_playwright
except ImportError:
logger.warning("Playwright package not installed")
return False
# Try to launch browser to check if it's installed
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
browser.close()
_playwright_browsers_installed = True
logger.info("Playwright browsers already installed")
return True
except Exception as e:
logger.info(f"Playwright browsers not installed, attempting installation: {e}")
# Try to install browsers
try:
logger.info("Installing Playwright Chromium browser...")
result = subprocess.run(
["playwright", "install", "chromium"],
capture_output=True,
text=True,
timeout=300, # 5 minute timeout
)
if result.returncode == 0:
logger.info("Playwright Chromium installed successfully")
_playwright_browsers_installed = True
return True
else:
logger.warning(f"Playwright install failed: {result.stderr}")
return False
except subprocess.TimeoutExpired:
logger.warning("Playwright installation timed out")
return False
except FileNotFoundError:
logger.warning("playwright command not found")
return False
except Exception as e:
logger.warning(f"Failed to install Playwright browsers: {e}")
return False
def create_cloudflare_session(
delay: int = 3,
browser: str = "chrome",
platform: str = "linux",
) -> requests.Session:
"""Create a cloudscraper session configured for Cloudflare bypass.
Args:
delay: Delay in seconds for challenge solving (default: 3)
browser: Browser to emulate (chrome, firefox)
platform: Platform to emulate (linux, windows, darwin)
Returns:
A cloudscraper session ready for Cloudflare-protected sites
"""
session = cloudscraper.create_scraper(
browser={"browser": browser, "platform": platform, "desktop": True},
delay=delay,
)
session.headers.update(BROWSER_HEADERS)
return session
class CurlCffiSessionWrapper:
"""Wrapper to make curl_cffi.Session behave like requests.Session.
This allows curl_cffi to be used as a drop-in replacement.
"""
def __init__(self, session: Any = None, impersonate: str = "chrome120"):
"""Initialize wrapper with existing session or create new one.
Args:
session: Existing curl_cffi Session to wrap (reuses cookies)
impersonate: Browser to impersonate if creating new session
"""
if session is not None:
self._session = session
else:
from curl_cffi.requests import Session
self._session = Session(impersonate=impersonate)
self._impersonate = impersonate
self.headers = dict(BROWSER_HEADERS)
self.cookies = self._session.cookies
def get(self, url: str, **kwargs) -> Any:
"""Make GET request."""
kwargs.setdefault("timeout", 15)
# Merge headers
headers = dict(self.headers)
if "headers" in kwargs:
headers.update(kwargs["headers"])
kwargs["headers"] = headers
return self._session.get(url, **kwargs)
def post(self, url: str, **kwargs) -> Any:
"""Make POST request."""
kwargs.setdefault("timeout", 15)
headers = dict(self.headers)
if "headers" in kwargs:
headers.update(kwargs["headers"])
kwargs["headers"] = headers
return self._session.post(url, **kwargs)
def request(self, method: str, url: str, **kwargs) -> Any:
"""Make any HTTP request."""
kwargs.setdefault("timeout", 15)
headers = dict(self.headers)
if "headers" in kwargs:
headers.update(kwargs["headers"])
kwargs["headers"] = headers
return self._session.request(method, url, **kwargs)
def _create_session_with_curl_cffi(
base_url: str,
timeout_seconds: float = 15.0,
) -> Any | None:
"""Create a session using curl_cffi with browser TLS fingerprints.
curl_cffi impersonates real browser TLS fingerprints, which is very
effective against Cloudflare's TLS fingerprinting.
Args:
base_url: The base URL to test against
timeout_seconds: Timeout for test requests
Returns:
A curl_cffi session wrapper (reusing the session that passed), or None if failed
"""
try:
from curl_cffi.requests import Session
except ImportError:
logger.warning("curl_cffi not available, skipping TLS fingerprint bypass")
return None
base_url = base_url.rstrip("/")
for impersonate in CURL_CFFI_IMPERSONATES:
try:
session = Session(impersonate=impersonate)
# Test if this impersonation works
test_resp = session.get(
f"{base_url}/",
timeout=timeout_seconds,
allow_redirects=True,
)
if test_resp.status_code == 200:
logger.info(f"Cloudflare bypass successful with curl_cffi impersonate={impersonate}")
# IMPORTANT: Return wrapper with the SAME session that passed (keeps cookies!)
wrapper = CurlCffiSessionWrapper(session=session, impersonate=impersonate)
return wrapper
elif test_resp.status_code == 403:
logger.warning(f"curl_cffi {impersonate} got 403, trying next...")
continue
except Exception as e:
logger.warning(f"curl_cffi {impersonate} failed: {e}, trying next...")
continue
logger.warning("All curl_cffi impersonations failed")
return None
def _create_session_with_playwright(
base_url: str,
timeout_seconds: float = 30.0,
headless: bool = True,
) -> requests.Session | None:
"""Create a requests session using cookies obtained from Playwright with stealth.
Uses a real browser to solve Cloudflare challenges and transfers cookies
to a requests session. Will attempt to install browsers if not present.
Args:
base_url: The base URL to navigate to
timeout_seconds: Timeout for page load
headless: Run browser in headless mode (default: True)
Returns:
A requests.Session with Cloudflare cookies, or None if failed
"""
# Ensure browsers are installed (this also verifies imports work)
if not _ensure_playwright_browsers():
logger.warning("Playwright browsers not available")
return None
base_url = base_url.rstrip("/")
session = requests.Session()
session.headers.update(BROWSER_HEADERS)
try:
# Import here after ensuring browsers are installed
from playwright.sync_api import sync_playwright
from playwright_stealth import Stealth
stealth = Stealth()
with sync_playwright() as p:
# Launch browser with stealth settings
browser = p.chromium.launch(
headless=headless,
args=[
"--disable-blink-features=AutomationControlled",
"--disable-dev-shm-usage",
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-infobars",
"--window-size=1920,1080",
"--start-maximized",
],
)
# Create context with realistic viewport and user agent
context = browser.new_context(
viewport={"width": 1920, "height": 1080},
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
locale="en-US",
timezone_id="America/New_York",
)
page = context.new_page()
# Apply stealth to avoid detection
stealth.apply_stealth_sync(page)
# Navigate to the site and wait for Cloudflare challenge to complete
logger.info(f"Playwright: Navigating to {base_url}")
page.goto(base_url, wait_until="networkidle", timeout=timeout_seconds * 1000)
# Wait additional time for any JS challenges
page.wait_for_timeout(3000)
# Check if we got past Cloudflare
content = page.content().lower()
if "cloudflare" in content and ("challenge" in content or "checking" in content):
# Wait longer for challenge to complete
logger.info("Playwright: Waiting for Cloudflare challenge...")
page.wait_for_timeout(5000)
# Get cookies from browser
cookies = context.cookies()
logger.info(f"Playwright: Got {len(cookies)} cookies")
# Transfer cookies to requests session
for cookie in cookies:
session.cookies.set(
cookie["name"],
cookie["value"],
domain=cookie.get("domain", ""),
path=cookie.get("path", "/"),
)
# Get the user agent used
user_agent = page.evaluate("navigator.userAgent")
session.headers["User-Agent"] = user_agent
browser.close()
# Test if session works
test_resp = session.get(
f"{base_url}/",
timeout=timeout_seconds,
allow_redirects=True,
)
if test_resp.status_code == 200:
logger.info("Playwright: Session created successfully")
return session
else:
logger.warning(f"Playwright: Test request got status {test_resp.status_code}")
return None
except ImportError as e:
logger.warning(f"Playwright import failed: {e}")
return None
except Exception as e:
logger.error(f"Playwright fallback failed: {e}")
return None
def create_cloudflare_session_with_fallback(
base_url: str,
timeout_seconds: float = 15.0,
use_playwright: bool = True,
use_curl_cffi: bool = True,
use_cloudscraper: bool = True,
) -> Any:
"""Create a session with Cloudflare bypass, trying multiple strategies.
Order of attempts (Playwright first as most effective):
1. Playwright with stealth (real browser - best success rate)
2. curl_cffi with real browser TLS fingerprints
3. cloudscraper with different browser profiles
Args:
base_url: The base URL to test against
timeout_seconds: Timeout for test requests
use_playwright: Try Playwright first (default: True)
use_curl_cffi: Try curl_cffi (default: True)
use_cloudscraper: Try cloudscraper (default: True)
Returns:
A session configured to bypass Cloudflare
"""
base_url = base_url.rstrip("/")
# Strategy 1: Try Playwright with stealth (most effective)
if use_playwright:
logger.info("Trying Playwright with stealth (most effective)...")
playwright_session = _create_session_with_playwright(
base_url,
timeout_seconds=30.0,
headless=True,
)
if playwright_session is not None:
return playwright_session
logger.warning("Playwright failed, trying next strategy...")
# Strategy 2: Try curl_cffi with TLS fingerprinting
if use_curl_cffi:
logger.info("Trying curl_cffi with TLS fingerprints...")
curl_session = _create_session_with_curl_cffi(base_url, timeout_seconds)
if curl_session is not None:
return curl_session
logger.warning("curl_cffi failed, trying next strategy...")
# Strategy 3: Try cloudscraper with different profiles
if use_cloudscraper:
logger.info("Trying cloudscraper profiles...")
for profile in BROWSER_PROFILES:
try:
session = cloudscraper.create_scraper(
browser=profile,
delay=3,
)
session.headers.update(BROWSER_HEADERS)
test_resp = session.get(
f"{base_url}/",
timeout=timeout_seconds,
allow_redirects=True,
)
if test_resp.status_code == 200:
logger.info(f"Cloudflare bypass successful with cloudscraper profile: {profile}")
return session
elif test_resp.status_code == 403:
logger.warning(f"Cloudscraper profile {profile} got 403, trying next...")
continue
except Exception as e:
logger.warning(f"Cloudscraper profile {profile} failed: {e}, trying next...")
continue
# Ultimate fallback: return cloudscraper session even if it didn't work
logger.warning("All bypass methods failed, using basic cloudscraper as fallback")
session = cloudscraper.create_scraper(
browser={"browser": "chrome", "platform": "linux", "desktop": True},
delay=5,
)
session.headers.update(BROWSER_HEADERS)
return session
def warm_up_session(
session: Any,
base_url: str,
timeout_seconds: float = 15.0,
with_delay: bool = True,
) -> bool:
"""Warm up a session to obtain Cloudflare cookies.
Args:
session: The session to warm up
base_url: Base URL of the site
timeout_seconds: Request timeout
with_delay: Add delays between requests
Returns:
True if at least one warm-up request succeeded
"""
base_url = base_url.rstrip("/")
warmup_urls = [
f"{base_url}/",
f"{base_url}/about",
]
success = False
for i, url in enumerate(warmup_urls):
try:
resp = session.get(
url,
timeout=timeout_seconds,
allow_redirects=True,
)
if resp.status_code == 200:
success = True
logger.debug(f"Warm-up successful: {url}")
else:
logger.warning(f"Warm-up got status {resp.status_code}: {url}")
# Add delay between requests
if with_delay and i < len(warmup_urls) - 1:
time.sleep(0.5)
except Exception as e:
logger.warning(f"Warm-up failed for {url}: {e}")
return success
def extended_warm_up(
session: Any,
base_url: str,
timeout_seconds: float = 15.0,
) -> None:
"""Extended warm-up with delays and multiple page visits.
More aggressive than basic warm-up to help with Cloudflare challenges.
Args:
session: The session to warm up
base_url: Base URL of the site
timeout_seconds: Request timeout
"""
base_url = base_url.rstrip("/")
warmup_urls = [
f"{base_url}/",
f"{base_url}/about",
f"{base_url}/categories",
f"{base_url}/top",
]
for i, url in enumerate(warmup_urls):
try:
resp = session.get(
url,
timeout=timeout_seconds,
allow_redirects=True,
)
if resp.status_code == 200:
logger.debug(f"Warm-up successful for {url}")
else:
logger.warning(f"Warm-up got status {resp.status_code} for {url}")
# Add delay between requests to avoid rate limiting
if i < len(warmup_urls) - 1:
time.sleep(0.5)
except Exception as e:
logger.warning(f"Warm-up failed for {url}: {e}")
def is_cloudflare_challenge(response: Any) -> bool:
"""Check if a response is a Cloudflare challenge page.
Args:
response: The response to check
Returns:
True if this looks like a Cloudflare challenge
"""
if response.status_code != 200:
return False
content_type = response.headers.get("Content-Type", "")
if "text/html" not in content_type:
return False
content_lower = response.text[:1000].lower()
return "cloudflare" in content_lower or "challenge" in content_lower
def is_cloudflare_error(status_code: int) -> bool:
"""Check if a status code is a Cloudflare-related error.
Args:
status_code: HTTP status code
Returns:
True if this is a Cloudflare-related error code
"""
return status_code in CLOUDFLARE_RETRY_CODES