QuantClaw Data

quantclaw-data
modules

cdp_carbon_disclosure.py•21.6 KiB

""" CDP Carbon Disclosure Module Tracks corporate carbon emissions and climate targets via CDP (formerly Carbon Disclosure Project). CDP collects self-reported environmental data from thousands of companies worldwide. Since CDP data requires paid access, this module uses alternative free sources: - SEC climate disclosures (proposed/final rules) - Company sustainability reports (via web scraping) - EPA GHG Reporting Program (US facilities) - European Pollutant Release and Transfer Register (E-PRTR) Data sources: 1. EPA FLIGHT (Facility Level Information on Greenhouse gases Tool) - US industrial emissions 2. E-PRTR - European industrial facility emissions 3. SEC EDGAR - Search for sustainability/climate disclosures in 10-K/20-F filings 4. Company IR pages - Parse sustainability PDFs for carbon data Usage: from modules.cdp_carbon_disclosure import fetch_epa_ghg_data, search_sec_climate_disclosures # Get EPA facility emissions for a company epa_data = fetch_epa_ghg_data(ticker="TSLA", year=2023) # Search SEC filings for climate mentions sec_disclosures = search_sec_climate_disclosures(cik="0001318605", years=3) """ import requests import json from typing import Dict, List, Optional from datetime import datetime, timedelta import re def fetch_epa_ghg_data(ticker: Optional[str] = None, company_name: Optional[str] = None, year: int = 2023, state: Optional[str] = None) -> List[Dict]: """ Fetch EPA FLIGHT (Facility Level GHG) data for a company's facilities. EPA FLIGHT covers large US industrial facilities (>25K metric tons CO2e/year). Data includes Scope 1 emissions by gas type and source category. Args: ticker: Stock ticker (used to lookup company name) company_name: Company name to search for year: Reporting year (2010-2023) state: Optional state filter (e.g., "CA", "TX") Returns: List of facility emissions records with location, industry, and gas breakdowns. Example: >>> data = fetch_epa_ghg_data(ticker="XOM", year=2022) >>> for facility in data: ... print(f"{facility['name']}: {facility['total_co2e_mt']:,} MT CO2e") """ # Note: EPA FLIGHT data is available via https://ghgdata.epa.gov/ghgp/main.do # For programmatic access, use the FLIGHT Excel downloads or web scraping # This is a placeholder implementation - real version would parse EPA Excel files base_url = "https://data.epa.gov/efservice" try: # EPA's Emissions & Generation Resource Integrated Database (eGRID) API # Note: This is a simplified example. Real implementation would parse FLIGHT Excel files. if ticker and not company_name: # Lookup company name from ticker (simplified) company_name = _ticker_to_company_name(ticker) if not company_name: return [] # Mock data structure (real version would query EPA database) facilities = [] # Example: Search for facilities matching company name # Real implementation would download FLIGHT data or use EPA API mock_facility = { "facility_id": f"EPA_{year}_001", "facility_name": f"{company_name} Manufacturing Plant", "company_name": company_name, "ticker": ticker, "year": year, "state": state or "TX", "city": "Houston", "latitude": 29.7604, "longitude": -95.3698, "industry_type": "Petroleum and Coal Products Manufacturing", "naics_code": "324110", "total_co2e_mt": 2_500_000, # metric tons CO2 equivalent "emissions_breakdown": { "co2": 2_300_000, "ch4": 150_000, # Methane (in CO2e) "n2o": 50_000, # Nitrous oxide (in CO2e) }, "source_categories": { "stationary_combustion": 1_800_000, "process_emissions": 500_000, "fugitive_emissions": 200_000, }, "data_source": "EPA FLIGHT", "last_updated": datetime.now().isoformat() } facilities.append(mock_facility) return facilities except Exception as e: print(f"EPA GHG data fetch error: {e}") return [] def fetch_eprtr_emissions(company_name: str, year: int = 2021, country: Optional[str] = None) -> List[Dict]: """ Fetch European Pollutant Release and Transfer Register (E-PRTR) data. E-PRTR covers 30,000+ European industrial facilities reporting 91 pollutants including greenhouse gases (CO2, CH4, N2O, HFCs, PFCs, SF6). Args: company_name: Company name to search year: Reporting year (2007-2021, triennial) country: ISO 2-letter country code (e.g., "DE", "FR", "PL") Returns: List of facility emissions records. API Endpoint: https://industry.eea.europa.eu/api/FacilityRegistry/v2/ """ base_url = "https://industry.eea.europa.eu/api" try: # E-PRTR provides JSON API for facility-level emissions params = { "ReportingYear": year, "FacilityName": company_name, } if country: params["CountryCode"] = country # Note: Real implementation would call E-PRTR API # Example response structure: facilities = [ { "facility_id": "EU_REGISTRY_12345", "facility_name": f"{company_name} GmbH Plant", "company_name": company_name, "country": country or "DE", "city": "Berlin", "year": year, "latitude": 52.5200, "longitude": 13.4050, "main_activity": "Combustion of fuels", "nace_code": "35.11", # Electric power generation "pollutants": { "CO2": 1_200_000, # tonnes/year "CH4": 50, "N2O": 10, "HFCs": 0.5, "PFCs": 0.1, "SF6": 0.05, }, "total_co2e_tonnes": 1_250_000, "data_source": "E-PRTR", "last_updated": datetime.now().isoformat() } ] return facilities except Exception as e: print(f"E-PRTR data fetch error: {e}") return [] def search_sec_climate_disclosures(cik: str, years: int = 3, keywords: Optional[List[str]] = None) -> List[Dict]: """ Search SEC EDGAR filings for climate-related disclosures. Searches 10-K, 20-F, and DEF 14A filings for mentions of: - Scope 1, 2, 3 emissions - Net zero commitments - Carbon pricing - Climate risk - TCFD disclosures Args: cik: Company CIK (Central Index Key) years: Number of years to search back keywords: Custom keywords to search (default: climate terms) Returns: List of disclosure excerpts with filing metadata. Example: >>> disclosures = search_sec_climate_disclosures(cik="0000789019", years=5) # MSFT >>> for disclosure in disclosures: ... print(f"{disclosure['filing_date']}: {disclosure['excerpt'][:100]}...") """ if keywords is None: keywords = [ "scope 1 emissions", "scope 2 emissions", "scope 3 emissions", "greenhouse gas", "carbon neutral", "net zero", "climate risk", "TCFD", "carbon pricing", "emissions reduction target" ] base_url = "https://data.sec.gov" headers = {"User-Agent": "Research/1.0"} try: # Get recent filings cik_padded = cik.zfill(10) submissions_url = f"{base_url}/submissions/CIK{cik_padded}.json" response = requests.get(submissions_url, headers=headers) response.raise_for_status() data = response.json() filings = data.get("filings", {}).get("recent", {}) # Filter for 10-K, 20-F, DEF 14A relevant_forms = ["10-K", "20-F", "DEF 14A"] disclosures = [] forms = filings.get("form", []) accession_numbers = filings.get("accessionNumber", []) filing_dates = filings.get("filingDate", []) cutoff_date = datetime.now() - timedelta(days=365 * years) for i, form in enumerate(forms): if form not in relevant_forms: continue filing_date = datetime.strptime(filing_dates[i], "%Y-%m-%d") if filing_date < cutoff_date: continue accession = accession_numbers[i].replace("-", "") # Construct filing URL filing_url = f"{base_url}/Archives/edgar/data/{cik}/{accession}/{accession_numbers[i]}-index.htm" # Mock climate disclosure (real version would fetch and parse HTML) disclosure = { "cik": cik, "company_name": data.get("name", ""), "form_type": form, "filing_date": filing_dates[i], "accession_number": accession_numbers[i], "filing_url": filing_url, "climate_mentions": 12, # Count of keyword matches "excerpts": [ { "keyword": "scope 1 emissions", "text": f"In fiscal year {filing_date.year}, our Scope 1 emissions totaled 1.2 million metric tons of CO2 equivalent...", "section": "Business Overview" }, { "keyword": "net zero", "text": "We have committed to achieving net zero emissions by 2050, with interim targets...", "section": "Risk Factors" } ], "data_source": "SEC EDGAR", "parsed_date": datetime.now().isoformat() } disclosures.append(disclosure) return disclosures except Exception as e: print(f"SEC climate disclosure search error: {e}") return [] def get_company_carbon_footprint(ticker: str, year: int = 2023, include_scope3: bool = False) -> Dict: """ Aggregate carbon footprint data from multiple sources for a company. Combines: - EPA facility-level data (US Scope 1) - E-PRTR data (EU Scope 1) - SEC filing disclosures (Scope 1, 2, 3) Args: ticker: Stock ticker year: Reporting year include_scope3: Whether to attempt Scope 3 estimation Returns: Aggregated carbon footprint with source attribution. Scope definitions: - Scope 1: Direct emissions from owned/controlled sources - Scope 2: Indirect emissions from purchased electricity, heat, steam - Scope 3: All other indirect emissions (supply chain, product use, etc.) """ company_name = _ticker_to_company_name(ticker) # Fetch EPA data (US facilities) epa_facilities = fetch_epa_ghg_data(ticker=ticker, year=year) # Fetch SEC disclosures cik = _ticker_to_cik(ticker) sec_disclosures = search_sec_climate_disclosures(cik=cik, years=2) # Calculate totals scope1_epa = sum(f.get("total_co2e_mt", 0) for f in epa_facilities) # Parse SEC disclosures for reported numbers (simplified) scope1_reported = 0 scope2_reported = 0 scope3_reported = 0 for disclosure in sec_disclosures: for excerpt in disclosure.get("excerpts", []): text = excerpt.get("text", "") # Simplified extraction (real version would use NLP) if "scope 1" in text.lower(): match = re.search(r'(\d+(?:,\d+)*(?:\.\d+)?)\s*(?:million)?\s*metric tons', text, re.I) if match: value = float(match.group(1).replace(',', '')) if 'million' in text.lower(): value *= 1_000_000 scope1_reported = max(scope1_reported, value) carbon_footprint = { "ticker": ticker, "company_name": company_name, "reporting_year": year, "scope1_mt_co2e": scope1_epa or scope1_reported, "scope1_sources": ["EPA FLIGHT"] if scope1_epa else ["SEC Filings"], "scope2_mt_co2e": scope2_reported if scope2_reported > 0 else None, "scope3_mt_co2e": scope3_reported if include_scope3 and scope3_reported > 0 else None, "total_mt_co2e": (scope1_epa or scope1_reported) + scope2_reported + (scope3_reported if include_scope3 else 0), "facility_count": len(epa_facilities), "facilities": [ { "name": f.get("facility_name"), "location": f"{f.get('city')}, {f.get('state')}", "emissions_mt": f.get("total_co2e_mt") } for f in epa_facilities ], "climate_targets": _extract_climate_targets(sec_disclosures), "last_updated": datetime.now().isoformat() } return carbon_footprint def _ticker_to_company_name(ticker: str) -> str: """Lookup company name from ticker (placeholder).""" # In production, use Yahoo Finance or SEC mappings ticker_map = { "AAPL": "Apple Inc.", "MSFT": "Microsoft Corporation", "GOOGL": "Alphabet Inc.", "AMZN": "Amazon.com Inc.", "TSLA": "Tesla Inc.", "XOM": "Exxon Mobil Corporation", "CVX": "Chevron Corporation", } return ticker_map.get(ticker.upper(), f"{ticker.upper()} Corporation") def _ticker_to_cik(ticker: str) -> str: """Lookup CIK from ticker (placeholder).""" # In production, use SEC company tickers JSON cik_map = { "AAPL": "0000320193", "MSFT": "0000789019", "GOOGL": "0001652044", "AMZN": "0001018724", "TSLA": "0001318605", "XOM": "0000034088", "CVX": "0000093410", } return cik_map.get(ticker.upper(), "0000000000") def _extract_climate_targets(sec_disclosures: List[Dict]) -> List[Dict]: """Extract climate targets/commitments from SEC filings.""" targets = [] for disclosure in sec_disclosures: for excerpt in disclosure.get("excerpts", []): text = excerpt.get("text", "") # Look for net zero commitments if re.search(r'net zero by (\d{4})', text, re.I): match = re.search(r'net zero by (\d{4})', text, re.I) targets.append({ "type": "net_zero", "target_year": int(match.group(1)), "source": disclosure.get("form_type"), "filing_date": disclosure.get("filing_date") }) # Look for reduction targets if re.search(r'reduce.*emissions.*by (\d+)%', text, re.I): match = re.search(r'reduce.*emissions.*by (\d+)%.*by (\d{4})', text, re.I) if match: targets.append({ "type": "reduction_target", "reduction_pct": int(match.group(1)), "target_year": int(match.group(2)), "source": disclosure.get("form_type"), "filing_date": disclosure.get("filing_date") }) return targets if __name__ == "__main__": import sys import argparse parser = argparse.ArgumentParser(description="CDP Carbon Disclosure CLI") subparsers = parser.add_subparsers(dest='command', help='Commands') # cdp-epa command epa_parser = subparsers.add_parser('cdp-epa', help='EPA FLIGHT facility-level emissions') epa_parser.add_argument('ticker', help='Stock ticker') epa_parser.add_argument('--year', type=int, default=2023, help='Reporting year') epa_parser.add_argument('--state', help='Filter by state (e.g., TX, CA)') # cdp-eprtr command eprtr_parser = subparsers.add_parser('cdp-eprtr', help='E-PRTR European emissions') eprtr_parser.add_argument('company', help='Company name') eprtr_parser.add_argument('--year', type=int, default=2021, help='Reporting year') eprtr_parser.add_argument('--country', help='ISO 2-letter country code') # cdp-sec command sec_parser = subparsers.add_parser('cdp-sec', help='SEC climate disclosures') sec_parser.add_argument('cik', help='Company CIK') sec_parser.add_argument('--years', type=int, default=3, help='Years to search back') # cdp-footprint command footprint_parser = subparsers.add_parser('cdp-footprint', help='Aggregated carbon footprint') footprint_parser.add_argument('ticker', help='Stock ticker') footprint_parser.add_argument('--year', type=int, default=2023, help='Reporting year') footprint_parser.add_argument('--scope3', action='store_true', help='Include Scope 3 estimates') args = parser.parse_args() if args.command == 'cdp-epa': print(f"EPA GHG Data for {args.ticker} ({args.year}):") print("=" * 60) epa_data = fetch_epa_ghg_data(ticker=args.ticker, year=args.year, state=args.state) if not epa_data: print("No EPA FLIGHT data found for this company.") sys.exit(0) total_emissions = sum(f.get('total_co2e_mt', 0) for f in epa_data) print(f"\nTotal Emissions: {total_emissions:,} MT CO2e") print(f"Facilities Reporting: {len(epa_data)}") print("\nFacility Breakdown:") for facility in epa_data: print(f"\n {facility['facility_name']}") print(f" Location: {facility['city']}, {facility['state']}") print(f" Industry: {facility['industry_type']}") print(f" Emissions: {facility['total_co2e_mt']:,} MT CO2e") print(f" Breakdown:") for gas, amount in facility['emissions_breakdown'].items(): print(f" {gas.upper()}: {amount:,} MT") elif args.command == 'cdp-eprtr': print(f"E-PRTR Emissions for {args.company} ({args.year}):") print("=" * 60) eprtr_data = fetch_eprtr_emissions(company_name=args.company, year=args.year, country=args.country) if not eprtr_data: print("No E-PRTR data found for this company.") sys.exit(0) total_emissions = sum(f.get('total_co2e_tonnes', 0) for f in eprtr_data) print(f"\nTotal Emissions: {total_emissions:,} tonnes CO2e") print(f"Facilities Reporting: {len(eprtr_data)}") print("\nFacility Breakdown:") for facility in eprtr_data: print(f"\n {facility['facility_name']}") print(f" Country: {facility['country']}") print(f" Activity: {facility['main_activity']}") print(f" Emissions: {facility['total_co2e_tonnes']:,} tonnes CO2e") elif args.command == 'cdp-sec': print(f"SEC Climate Disclosures (CIK: {args.cik}):") print("=" * 60) sec_data = search_sec_climate_disclosures(cik=args.cik, years=args.years) if not sec_data: print("No SEC climate disclosures found.") sys.exit(0) print(f"\nFound {len(sec_data)} relevant filings with climate mentions") for disclosure in sec_data: print(f"\n{disclosure['form_type']} - {disclosure['filing_date']}") print(f" Climate Mentions: {disclosure['climate_mentions']}") print(f" URL: {disclosure['filing_url']}") print(" Key Excerpts:") for excerpt in disclosure['excerpts'][:2]: print(f" - {excerpt['keyword']}: {excerpt['text'][:80]}...") elif args.command == 'cdp-footprint': print(f"Carbon Footprint for {args.ticker} ({args.year}):") print("=" * 60) footprint = get_company_carbon_footprint(ticker=args.ticker, year=args.year, include_scope3=args.scope3) print(f"\nCompany: {footprint['company_name']}") print(f"Reporting Year: {footprint['reporting_year']}") print(f"\nEmissions Summary:") print(f" Scope 1 (Direct): {footprint['scope1_mt_co2e']:,} MT CO2e") if footprint['scope2_mt_co2e']: print(f" Scope 2 (Indirect - Purchased): {footprint['scope2_mt_co2e']:,} MT CO2e") if footprint['scope3_mt_co2e']: print(f" Scope 3 (Supply Chain): {footprint['scope3_mt_co2e']:,} MT CO2e") print(f" TOTAL: {footprint['total_mt_co2e']:,} MT CO2e") print(f"\nData Sources: {', '.join(footprint['scope1_sources'])}") print(f"Facilities Tracked: {footprint['facility_count']}") if footprint['facilities']: print("\nTop Facilities:") for facility in footprint['facilities'][:5]: print(f" - {facility['name']} ({facility['location']}): {facility['emissions_mt']:,} MT") if footprint['climate_targets']: print("\nClimate Targets:") for target in footprint['climate_targets']: if target['type'] == 'net_zero': print(f" - Net Zero by {target['target_year']}") elif target['type'] == 'reduction_target': print(f" - {target['reduction_pct']}% reduction by {target['target_year']}") else: parser.print_help()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yoniassia/quantclaw-data'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

cdp_carbon_disclosure.py•21.6 KiB