Skip to main content
Glama
secrets.py10.9 kB
"""Secrets detection - identify potential secrets in memory content. This module provides pattern-based detection of common secrets like API keys, tokens, passwords, and credentials. It's designed to warn users before they accidentally store sensitive data in memory. Security Objectives: 1. Detect API keys, tokens, and credentials before storage 2. Warn users about potential secret exposure 3. Prevent accidental logging of secrets 4. Configurable detection (can be disabled if needed) References: - CWE-798: Use of Hard-coded Credentials - CWE-312: Cleartext Storage of Sensitive Information - OWASP: https://owasp.org/www-community/vulnerabilities/Use_of_hard-coded_password """ import re from dataclasses import dataclass # Secret detection patterns # These patterns are designed to minimize false positives while catching real secrets PATTERNS = { # Generic API keys and tokens "api_key": re.compile( r"(?i)(api[_-]?key|apikey|api[_-]?token|access[_-]?key)[\s=:\"']+([a-zA-Z0-9_\-]{20,})", re.IGNORECASE, ), # AWS keys "aws_access_key": re.compile(r"AKIA[0-9A-Z]{16}"), "aws_secret_key": re.compile( r"(?i)aws[_-]?secret[_-]?access[_-]?key[\s=:\"']+([a-zA-Z0-9/+=]{40})" ), # GitHub tokens "github_token": re.compile(r"gh[pours]_[a-zA-Z0-9]{36,}"), "github_classic": re.compile(r"ghp_[a-zA-Z0-9]{36,}"), # OpenAI API keys "openai_key": re.compile(r"sk-[a-zA-Z0-9]{48,}"), # Anthropic API keys "anthropic_key": re.compile(r"sk-ant-[a-zA-Z0-9\-]{95,}"), # Google API keys "google_api_key": re.compile(r"AIza[0-9A-Za-z_\-]{35}"), # Slack tokens "slack_token": re.compile(r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24,}"), # Generic tokens "bearer_token": re.compile(r"(?i)bearer[\s]+([a-zA-Z0-9_\-\.]{20,})"), # JWT tokens "jwt_token": re.compile(r"eyJ[a-zA-Z0-9_\-]+\.eyJ[a-zA-Z0-9_\-]+\.[a-zA-Z0-9_\-]+"), # Private keys (PEM format) "private_key": re.compile( r"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----", re.IGNORECASE, ), # Database connection strings "database_url": re.compile( r"(?i)(postgres|mysql|mongodb|redis)://[^\s@]+:[^\s@]+@[^\s]+", ), # Generic password patterns (conservative to avoid false positives) "password_assignment": re.compile( r"(?i)(password|passwd|pwd)[\s=:\"']+([^\s\"']{8,})", ), # Generic secret patterns "secret_assignment": re.compile( r"(?i)(secret|token|credential)[\s=:\"']+([a-zA-Z0-9_\-\.]{20,})", ), } @dataclass class SecretMatch: """Represents a detected secret.""" secret_type: str """Type of secret detected (e.g., 'api_key', 'aws_access_key')""" position: int """Character position in the text where secret was found""" context: str """Surrounding context (truncated, with secret partially redacted)""" def detect_secrets( text: str, *, max_matches: int = 10, context_chars: int = 40, ) -> list[SecretMatch]: """Detect potential secrets in text using pattern matching. This function scans text for common secret patterns like API keys, tokens, and passwords. It's designed to be conservative to minimize false positives while catching real secrets. Args: text: Text to scan for secrets max_matches: Maximum number of matches to return (default: 10) context_chars: Number of context characters to include (default: 40) Returns: List of SecretMatch objects for detected secrets Examples: >>> text = "My API key is sk-1234567890abcdef" >>> matches = detect_secrets(text) >>> matches[0].secret_type 'openai_key' """ matches: list[SecretMatch] = [] for secret_type, pattern in PATTERNS.items(): for match in pattern.finditer(text): if len(matches) >= max_matches: break # Get context around the match start = max(0, match.start() - context_chars) end = min(len(text), match.end() + context_chars) context = text[start:end] # Partially redact the matched secret in context matched_text = match.group(0) if len(matched_text) > 10: redacted = matched_text[:4] + "..." + matched_text[-4:] else: redacted = "***" context = context.replace(matched_text, redacted) matches.append( SecretMatch( secret_type=secret_type, position=match.start(), context=context.strip(), ) ) if len(matches) >= max_matches: break return matches def scan_file_for_secrets( file_path: str, *, max_matches: int = 10, ) -> list[SecretMatch]: """Scan a file for potential secrets. Args: file_path: Path to file to scan max_matches: Maximum number of matches to return Returns: List of SecretMatch objects for detected secrets Raises: FileNotFoundError: If file doesn't exist PermissionError: If file can't be read """ try: with open(file_path, encoding="utf-8", errors="ignore") as f: content = f.read() except FileNotFoundError as e: raise FileNotFoundError(f"File not found: {file_path}") from e except PermissionError as e: raise PermissionError(f"Permission denied reading file: {file_path}") from e return detect_secrets(content, max_matches=max_matches) def format_secret_warning(matches: list[SecretMatch]) -> str: """Format a user-friendly warning message about detected secrets. Args: matches: List of detected secrets Returns: Formatted warning message Examples: >>> matches = [SecretMatch("api_key", 10, "key = sk-...")] >>> print(format_secret_warning(matches)) ⚠️ WARNING: Detected 1 potential secret in content! ... """ if not matches: return "" lines = [ f"⚠️ WARNING: Detected {len(matches)} potential secret{'s' if len(matches) != 1 else ''} in content!", "", "Secret types detected:", ] # Group by type by_type: dict[str, int] = {} for match in matches: by_type[match.secret_type] = by_type.get(match.secret_type, 0) + 1 for secret_type, count in sorted(by_type.items()): lines.append(f" - {secret_type}: {count}") lines.extend( [ "", "This content may contain sensitive information that should not be stored.", "Consider:", " 1. Using environment variables for secrets", " 2. Using a secrets manager (AWS Secrets Manager, HashiCorp Vault, etc.)", " 3. Removing secrets from memory content", "", "To disable secrets detection, set CORTEXGRAPH_DETECT_SECRETS=false in your .env file.", ] ) return "\n".join(lines) def should_warn_about_secrets( matches: list[SecretMatch], *, min_confidence_types: set[str] | None = None, ) -> bool: """Determine if we should warn the user about detected secrets. This function applies heuristics to reduce false positives. For example, we might be more confident about AWS keys than generic "api_key" patterns. Args: matches: List of detected secrets min_confidence_types: Set of secret types we're very confident about (default: AWS, GitHub, OpenAI, Anthropic, etc.) Returns: True if we should warn the user, False otherwise """ if not matches: return False # High-confidence secret types (very unlikely to be false positives) if min_confidence_types is None: min_confidence_types = { "aws_access_key", "aws_secret_key", "github_token", "github_classic", "openai_key", "anthropic_key", "google_api_key", "slack_token", "private_key", "database_url", "jwt_token", } # Warn if ANY high-confidence type detected for match in matches: if match.secret_type in min_confidence_types: return True # For lower-confidence types, only warn if multiple matches return len(matches) >= 2 def redact_secrets(text: str, replacement: str = "***REDACTED***") -> str: """Redact detected secrets from text for safe logging. This function is useful for sanitizing log messages to prevent accidental secret exposure in logs. Args: text: Text to redact secrets from replacement: Replacement string for secrets (default: "***REDACTED***") Returns: Text with secrets replaced by redaction string Examples: >>> text = "My API key is sk-1234567890" >>> redact_secrets(text) 'My API key is ***REDACTED***' """ redacted = text for pattern in PATTERNS.values(): redacted = pattern.sub(replacement, redacted) return redacted def main() -> int: """CLI entry point for secrets detection.""" import argparse import sys parser = argparse.ArgumentParser(description="Scan text or files for potential secrets") parser.add_argument( "input", nargs="?", help="File to scan (if omitted, reads from stdin)", ) parser.add_argument( "--max-matches", type=int, default=10, help="Maximum matches to report (default: 10)", ) parser.add_argument( "--quiet", action="store_true", help="Only output if secrets found (exit code 1)", ) parser.add_argument( "--redact", action="store_true", help="Output text with secrets redacted", ) args = parser.parse_args() try: # Read input if args.input: matches = scan_file_for_secrets(args.input, max_matches=args.max_matches) with open(args.input, encoding="utf-8", errors="ignore") as f: content = f.read() else: content = sys.stdin.read() matches = detect_secrets(content, max_matches=args.max_matches) # Redact mode if args.redact: print(redact_secrets(content)) return 0 # Report mode if matches: if not args.quiet: print(format_secret_warning(matches), file=sys.stderr) return 1 else: if not args.quiet: print("✓ No secrets detected", file=sys.stderr) return 0 except Exception as e: print(f"Error: {e}", file=sys.stderr) return 1 if __name__ == "__main__": import sys sys.exit(main())

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/prefrontalsys/mnemex'

If you have feedback or need assistance with the MCP directory API, please join our Discord server