Skip to main content
Glama
talknerdytome-labs

Facebook Ads Library MCP Server

scrapecreators_service.py15.9 kB
import requests import sys import os import logging from datetime import datetime from typing import Dict, Any, List, Optional, Union # Set up logger logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) SEARCH_API_URL = "https://api.scrapecreators.com/v1/facebook/adLibrary/search/companies" ADS_API_URL = "https://api.scrapecreators.com/v1/facebook/adLibrary/company/ads" SCRAPECREATORS_API_KEY = None # --- Custom Exceptions --- class CreditExhaustedException(Exception): """Raised when ScrapeCreators API credits are exhausted.""" def __init__(self, message: str, credits_remaining: int = 0, topup_url: str = "https://scrapecreators.com/dashboard"): self.credits_remaining = credits_remaining self.topup_url = topup_url super().__init__(message) class RateLimitException(Exception): """Raised when ScrapeCreators API rate limit is exceeded.""" def __init__(self, message: str, retry_after: int = None): self.retry_after = retry_after super().__init__(message) # --- Helper Functions --- def check_credit_status(response: requests.Response) -> Optional[Dict[str, Any]]: """ Check response for credit-related information and errors. Args: response: HTTP response from ScrapeCreators API Returns: Dictionary with credit info if available, None otherwise Raises: CreditExhaustedException: If credits are exhausted RateLimitException: If rate limit is exceeded """ # Check for credit exhaustion status codes if response.status_code == 402: # Payment Required raise CreditExhaustedException( "ScrapeCreators API credits exhausted. Please top up your account to continue.", credits_remaining=0 ) elif response.status_code == 429: # Too Many Requests retry_after = response.headers.get('retry-after') raise RateLimitException( "ScrapeCreators API rate limit exceeded. Please wait before making more requests.", retry_after=int(retry_after) if retry_after else None ) elif response.status_code == 403: # Forbidden - could indicate credit issues # Check if it's credit-related try: error_data = response.json() if 'credit' in str(error_data).lower() or 'quota' in str(error_data).lower(): raise CreditExhaustedException( "ScrapeCreators API access denied. This may indicate insufficient credits.", credits_remaining=0 ) except: pass # Not JSON or not credit-related # Extract credit information from headers if available credit_info = {} headers = response.headers # Common header names for credit information for header_name in ['x-credits-remaining', 'x-credit-remaining', 'credits-remaining']: if header_name in headers: try: credit_info['credits_remaining'] = int(headers[header_name]) except ValueError: pass for header_name in ['x-credit-cost', 'credit-cost', 'x-credits-used']: if header_name in headers: try: credit_info['credit_cost'] = int(headers[header_name]) except ValueError: pass return credit_info if credit_info else None def get_scrapecreators_api_key() -> str: """ Get ScrapeCreators API key from command line arguments or environment variable. Caches the key in memory after first read. Priority: command line argument > environment variable Returns: str: The ScrapeCreators API key. Raises: Exception: If no key is provided in command line arguments or environment. """ global SCRAPECREATORS_API_KEY if SCRAPECREATORS_API_KEY is None: # Try command line argument first if "--scrapecreators-api-key" in sys.argv: token_index = sys.argv.index("--scrapecreators-api-key") + 1 if token_index < len(sys.argv): SCRAPECREATORS_API_KEY = sys.argv[token_index] logger.info(f"Using ScrapeCreators API key from command line arguments") else: raise Exception("--scrapecreators-api-key argument provided but no key value followed it") # Try environment variable elif os.getenv("SCRAPECREATORS_API_KEY"): SCRAPECREATORS_API_KEY = os.getenv("SCRAPECREATORS_API_KEY") logger.info(f"Using ScrapeCreators API key from environment variable") else: raise Exception("ScrapeCreators API key must be provided via '--scrapecreators-api-key' command line argument or 'SCRAPECREATORS_API_KEY' environment variable") return SCRAPECREATORS_API_KEY def get_platform_id(brand_name: str) -> Dict[str, str]: """ Get the Meta Platform ID for a given brand name. Args: brand_name: The name of the company or brand to search for. Returns: Dictionary mapping brand names to their Meta Platform IDs. Raises: requests.RequestException: If the API request fails. Exception: For other errors. """ api_key = get_scrapecreators_api_key() response = requests.get( SEARCH_API_URL, headers={"x-api-key": api_key}, params={ "query": brand_name, }, timeout=30 # Add timeout for better error handling ) # Check for credit-related issues before raising for status credit_info = check_credit_status(response) response.raise_for_status() content = response.json() logger.info(f"Search response for '{brand_name}': {len(content.get('searchResults', []))} results found") options = {} for result in content.get("searchResults", []): name = result.get("name") page_id = result.get("page_id") if name and page_id: options[name] = page_id return options def get_ads( page_id: str, limit: int = 50, country: Optional[str] = None, trim: bool = True ) -> List[Dict[str, Any]]: """ Get ads for a specific page ID with pagination support. Args: page_id: The Meta Platform ID for the brand. limit: Maximum number of ads to retrieve. country: Optional country code to filter ads (e.g., "US", "CA"). trim: Whether to trim the response to essential fields only. Returns: List of ad objects with details. Raises: requests.RequestException: If the API request fails. Exception: For other errors. """ api_key = get_scrapecreators_api_key() cursor = None headers = { "x-api-key": api_key } params = { "pageId": page_id, "limit": min(limit, 100) # Ensure we don't exceed API limits } # Add optional parameters if provided if country: params["country"] = country.upper() if trim: params["trim"] = "true" ads = [] total_requests = 0 max_requests = 10 # Allow more requests for comprehensive data while len(ads) < limit and total_requests < max_requests: if cursor: params['cursor'] = cursor try: response = requests.get( ADS_API_URL, headers=headers, params=params, timeout=30 ) total_requests += 1 # Check for credit-related issues try: credit_info = check_credit_status(response) except (CreditExhaustedException, RateLimitException): # Re-raise credit/rate limit exceptions to be handled by caller raise if response.status_code != 200: logger.error(f"Error getting FB ads for page {page_id}: {response.status_code} {response.text}") break resJson = response.json() logger.info(f"Retrieved {len(resJson.get('results', []))} ads from API (request {total_requests})") res_ads = parse_fb_ads(resJson, trim) if len(res_ads) == 0: logger.info("No more ads found, stopping pagination") break ads.extend(res_ads) # Get cursor for next page cursor = resJson.get('cursor') if not cursor: logger.info("No cursor found, reached end of results") break except requests.RequestException as e: logger.error(f"Network error while fetching ads: {str(e)}") break except Exception as e: logger.error(f"Error processing ads response: {str(e)}") break # Trim to requested limit return ads[:limit] def get_platform_ids_batch(brand_names: List[str]) -> Dict[str, Dict[str, str]]: """ Get Meta Platform IDs for multiple brand names with deduplication. Args: brand_names: List of company or brand names to search for. Returns: Dictionary mapping brand names to their platform ID results. Format: {brand_name: {platform_name: platform_id, ...}, ...} Raises: CreditExhaustedException: If API credits are exhausted RateLimitException: If rate limit is exceeded requests.RequestException: If API requests fail """ # Deduplicate brand names while preserving order unique_brands = list(dict.fromkeys(brand_names)) results = {} logger.info(f"Batch processing {len(unique_brands)} unique brands from {len(brand_names)} requested") for brand_name in unique_brands: try: platform_ids = get_platform_id(brand_name) results[brand_name] = platform_ids logger.info(f"Successfully retrieved platform IDs for '{brand_name}': {len(platform_ids)} found") except (CreditExhaustedException, RateLimitException): # Re-raise credit/rate limit exceptions immediately raise except Exception as e: logger.error(f"Failed to get platform IDs for '{brand_name}': {str(e)}") results[brand_name] = {} return results def get_ads_batch(platform_ids: List[str], limit: int = 50, country: Optional[str] = None, trim: bool = True) -> Dict[str, List[Dict[str, Any]]]: """ Get ads for multiple platform IDs with deduplication. Args: platform_ids: List of Meta Platform IDs. limit: Maximum number of ads to retrieve per platform ID. country: Optional country code to filter ads. trim: Whether to trim the response to essential fields only. Returns: Dictionary mapping platform IDs to their ad results. Format: {platform_id: [ad_objects...], ...} Raises: CreditExhaustedException: If API credits are exhausted RateLimitException: If rate limit is exceeded requests.RequestException: If API requests fail """ # Deduplicate platform IDs while preserving order unique_platform_ids = list(dict.fromkeys(platform_ids)) results = {} logger.info(f"Batch processing {len(unique_platform_ids)} unique platform IDs from {len(platform_ids)} requested") for platform_id in unique_platform_ids: try: ads = get_ads(platform_id, limit, country, trim) results[platform_id] = ads logger.info(f"Successfully retrieved {len(ads)} ads for platform ID '{platform_id}'") except (CreditExhaustedException, RateLimitException): # Re-raise credit/rate limit exceptions immediately raise except Exception as e: logger.error(f"Failed to get ads for platform ID '{platform_id}': {str(e)}") results[platform_id] = [] return results def parse_fb_ads(resJson: Dict[str, Any], trim: bool = True) -> List[Dict[str, Any]]: """ Parse Facebook ads from API response. Args: resJson: The JSON response from the ScrapeCreators API. trim: Whether to include only essential fields. Returns: List of parsed ad objects. """ ads = [] results = resJson.get('results', []) logger.info(f"Parsing {len(results)} FB ads") for ad in results: try: ad_id = ad.get('ad_archive_id') if not ad_id: continue # Parse dates start_date = ad.get('start_date') end_date = ad.get('end_date') if start_date is not None: start_date = datetime.fromtimestamp(start_date).isoformat() if end_date is not None: end_date = datetime.fromtimestamp(end_date).isoformat() # Parse snapshot data snapshot = ad.get('snapshot', {}) media_type = snapshot.get('display_format') # Skip unsupported media types if media_type not in {'IMAGE', 'VIDEO', 'DCO'}: continue # Parse body text body = snapshot.get('body', {}) if body: bodies = [body.get('text')] else: bodies = [] # Parse media URLs based on type media_urls = [] if media_type == 'IMAGE': images = snapshot.get('images', []) if len(images) > 0: media_urls = [images[0].get('resized_image_url')] elif media_type == 'VIDEO': videos = snapshot.get('videos', []) if len(videos) > 0: media_urls = [videos[0].get('video_sd_url')] elif media_type == 'DCO': cards = snapshot.get('cards', []) if len(cards) > 0: media_urls = [card.get('resized_image_url') for card in cards] bodies = [card.get('body') for card in cards] # Skip if no media or body content if len(media_urls) == 0 or len(bodies) == 0: continue # Create ad objects for media_url, body_text in zip(media_urls, bodies): if media_url is not None and body_text: ad_obj = { 'ad_id': ad_id, 'start_date': start_date, 'end_date': end_date, 'media_url': media_url, 'body': body_text, 'media_type': media_type } # Add additional fields if not trimming if not trim: ad_obj.update({ 'page_id': ad.get('page_id'), 'page_name': ad.get('page_name'), 'currency': ad.get('currency'), 'funding_entity': ad.get('funding_entity'), 'impressions': ad.get('impressions'), 'spend': ad.get('spend'), 'disclaimer': ad.get('disclaimer'), 'languages': ad.get('languages'), 'publisher_platforms': ad.get('publisher_platforms'), 'platform_positions': ad.get('platform_positions'), 'effective_status': ad.get('effective_status') }) ads.append(ad_obj) except Exception as e: logger.error(f"Error parsing ad {ad.get('ad_archive_id', 'unknown')}: {str(e)}") continue return ads

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/talknerdytome-labs/facebook-ads-library-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server