Threat Intelligence MCP Server

threat-intel-mcp
src
world_intel_mcp

fetcher.py•8.78 KiB

"""Async HTTP fetcher with timeout, retry, rate limiting, and circuit breaker integration. All external HTTP calls in world-intel-mcp go through this module. Stale-data fallback: when an API call fails, the last-known-good cached response is returned (marked with _stale=True) so dashboards never go blank. """ import asyncio import logging import time from typing import Any import httpx from .cache import Cache from .circuit_breaker import CircuitBreaker logger = logging.getLogger("world-intel-mcp.fetcher") # Yahoo Finance requires serialized access (600ms gap) _yahoo_lock = asyncio.Lock() _yahoo_last_call: float = 0.0 _YAHOO_MIN_INTERVAL = 0.6 # seconds # Per-source rate limits (min seconds between calls). # Sources not listed here have no enforced limit. _SOURCE_RATE_LIMITS: dict[str, float] = { "yahoo-finance": 0.6, # unofficial — ~100 req/min safe "opensky": 6.0, # free tier: 10 req/min "coingecko": 2.0, # free tier: 30 calls/min "cloudflare-radar": 3.0, # 20 req/min "reddit": 1.5, # ~60 req/min (be conservative) "nasa-firms": 2.0, # API key: ~1000 req/day "adsblol": 5.0, # community API — be very polite "polymarket": 1.0, # be polite "faa": 1.0, # govt API "usgs": 1.0, # generous but be polite "acled": 2.0, # API key based "nga": 2.0, # govt API } _source_locks: dict[str, asyncio.Lock] = {} _source_last_call: dict[str, float] = {} class Fetcher: """Centralized HTTP fetcher with caching, retries, and circuit breaking.""" def __init__( self, cache: Cache, breaker: CircuitBreaker, default_timeout: float = 15.0, max_retries: int = 2, client: httpx.AsyncClient | None = None, ): self.cache = cache self.breaker = breaker self.default_timeout = default_timeout self.max_retries = max_retries self._client: httpx.AsyncClient | None = client async def _get_client(self) -> httpx.AsyncClient: if self._client is None or self._client.is_closed: self._client = httpx.AsyncClient( timeout=httpx.Timeout(self.default_timeout), follow_redirects=True, limits=httpx.Limits(max_connections=50, max_keepalive_connections=20), headers={"User-Agent": "PhoenixAGI-WorldIntel/0.1"}, proxy=None, # never inherit system SOCKS proxy ) return self._client async def close(self) -> None: if self._client and not self._client.is_closed: await self._client.aclose() self._client = None async def get_json( self, url: str, source: str, cache_key: str | None = None, cache_ttl: int = 300, headers: dict[str, str] | None = None, params: dict[str, Any] | None = None, timeout: float | None = None, yahoo_rate_limit: bool = False, ) -> dict | list | None: """Fetch JSON with caching, circuit breaking, and retries. Args: url: Target URL. source: Source name for circuit breaker tracking. cache_key: Cache key. If None, uses url+params hash. cache_ttl: Cache TTL in seconds. headers: Extra HTTP headers. params: Query parameters. timeout: Per-request timeout override. yahoo_rate_limit: If True, enforce Yahoo Finance 600ms serialization. Returns: Parsed JSON or None on failure. """ # Check cache (live) effective_key = cache_key or f"{source}:{url}:{params}" cached = self.cache.get(effective_key) if cached is not None: return cached # Check circuit breaker — fall back to stale data if open if not self.breaker.is_available(source): logger.debug("Circuit open for %s, trying stale cache", source) return self._stale_fallback(effective_key, source) # Per-source rate limiting if yahoo_rate_limit: await self._yahoo_throttle() await self._source_throttle(source) # Fetch with retries client = await self._get_client() last_error: Exception | None = None for attempt in range(self.max_retries + 1): try: resp = await client.get( url, headers=headers, params=params, timeout=timeout or self.default_timeout, ) resp.raise_for_status() data = resp.json() self.breaker.record_success(source) self.cache.set(effective_key, data, cache_ttl) return data except (httpx.HTTPStatusError, httpx.RequestError, Exception) as exc: last_error = exc if attempt < self.max_retries: wait = 1.0 * (attempt + 1) logger.debug("Retry %d/%d for %s (%s), waiting %.1fs", attempt + 1, self.max_retries, source, exc, wait) await asyncio.sleep(wait) # All retries failed — try stale cache before giving up self.breaker.record_failure(source) logger.warning("Fetch failed for %s: %s (url=%s)", source, last_error, url) return self._stale_fallback(effective_key, source) async def get_text( self, url: str, source: str, cache_key: str | None = None, cache_ttl: int = 300, headers: dict[str, str] | None = None, params: dict[str, Any] | None = None, timeout: float | None = None, ) -> str | None: """Fetch raw text with caching and circuit breaking.""" effective_key = cache_key or f"{source}:text:{url}:{params}" cached = self.cache.get(effective_key) if cached is not None: return cached if not self.breaker.is_available(source): return self._stale_fallback(effective_key, source) await self._source_throttle(source) client = await self._get_client() last_error: Exception | None = None for attempt in range(self.max_retries + 1): try: resp = await client.get( url, headers=headers, params=params, timeout=timeout or self.default_timeout, ) resp.raise_for_status() text = resp.text self.breaker.record_success(source) self.cache.set(effective_key, text, cache_ttl) return text except (httpx.HTTPStatusError, httpx.RequestError, Exception) as exc: last_error = exc if attempt < self.max_retries: await asyncio.sleep(1.0 * (attempt + 1)) self.breaker.record_failure(source) logger.warning("Text fetch failed for %s: %s", source, last_error) return self._stale_fallback(effective_key, source) async def get_xml( self, url: str, source: str, cache_key: str | None = None, cache_ttl: int = 300, timeout: float | None = None, ) -> str | None: """Fetch XML content (returns raw text for feedparser/ET parsing).""" return await self.get_text(url, source, cache_key, cache_ttl, timeout=timeout) def _stale_fallback(self, cache_key: str, source: str) -> Any | None: """Return stale (expired) cached data as last-known-good fallback.""" stale = self.cache.get_stale(cache_key) if stale is not None: logger.info("Serving stale cache for %s (key=%s)", source, cache_key) return stale async def _source_throttle(self, source: str) -> None: """Enforce per-source rate limit from _SOURCE_RATE_LIMITS.""" min_interval = _SOURCE_RATE_LIMITS.get(source) if min_interval is None: return if source not in _source_locks: _source_locks[source] = asyncio.Lock() async with _source_locks[source]: now = time.time() last = _source_last_call.get(source, 0.0) elapsed = now - last if elapsed < min_interval: await asyncio.sleep(min_interval - elapsed) _source_last_call[source] = time.time() async def _yahoo_throttle(self) -> None: """Enforce Yahoo Finance rate limit (600ms between calls).""" global _yahoo_last_call async with _yahoo_lock: now = time.time() elapsed = now - _yahoo_last_call if elapsed < _YAHOO_MIN_INTERVAL: await asyncio.sleep(_YAHOO_MIN_INTERVAL - elapsed) _yahoo_last_call = time.time()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/marc-shade/threat-intel-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

fetcher.py•8.78 KiB