credential-free

entropy.py•4.07 KiB

""" Shannon Entropy Calculator for Secret Detection High entropy strings are likely to be secrets """ import math import re from collections import Counter def calculate_shannon_entropy(data: str) -> float: """ Calculate Shannon entropy of a string Args: data: String to calculate entropy for Returns: Entropy value (0-8, higher = more random) """ if not data: return 0.0 # Count character frequencies counter = Counter(data) length = len(data) # Calculate entropy entropy = 0.0 for count in counter.values(): probability = count / length entropy -= probability * math.log2(probability) return entropy def calculate_base64_entropy(data: str) -> float: """ Calculate entropy specifically for base64-like strings Args: data: String to analyze Returns: Entropy value adjusted for base64 """ # Base64 uses A-Z, a-z, 0-9, +, / base64_chars = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=") # Check if it's likely base64 if not all(c in base64_chars for c in data): return 0.0 return calculate_shannon_entropy(data) def is_high_entropy_string(data: str, threshold: float = 4.5) -> bool: """ Check if a string has high entropy (likely a secret) Args: data: String to check threshold: Minimum entropy value (default 4.5) Returns: True if high entropy, False otherwise """ return calculate_shannon_entropy(data) >= threshold def extract_high_entropy_strings(text: str, min_length: int = 20, threshold: float = 4.5) -> list[dict]: """ Extract all high-entropy strings from text Args: text: Text to search min_length: Minimum string length to consider threshold: Minimum entropy threshold Returns: List of dicts with string and entropy value """ findings = [] # Find potential secrets (alphanumeric strings of sufficient length) pattern = re.compile(r'[A-Za-z0-9+/=_\-]{' + str(min_length) + r',}') for match in pattern.finditer(text): string = match.group() entropy = calculate_shannon_entropy(string) if entropy >= threshold: findings.append({ 'string': string, 'entropy': round(entropy, 2), 'start': match.start(), 'end': match.end(), 'length': len(string) }) return findings def get_entropy_category(entropy: float) -> str: """ Categorize entropy level Args: entropy: Entropy value Returns: Category string """ if entropy >= 5.5: return "VERY_HIGH" elif entropy >= 4.5: return "HIGH" elif entropy >= 3.5: return "MEDIUM" else: return "LOW" def analyze_string_randomness(data: str) -> dict: """ Comprehensive analysis of string randomness Args: data: String to analyze Returns: Dict with entropy metrics """ entropy = calculate_shannon_entropy(data) # Check for patterns that reduce randomness has_repeating_chars = bool(re.search(r'(.)\1{3,}', data)) has_sequential = bool(re.search(r'(abc|bcd|cde|def|012|123|234|345)', data, re.IGNORECASE)) has_pattern = bool(re.search(r'(test|demo|example|sample|key|secret)', data, re.IGNORECASE)) # Character diversity unique_chars = len(set(data)) total_chars = len(data) diversity = unique_chars / total_chars if total_chars > 0 else 0 return { 'entropy': round(entropy, 2), 'category': get_entropy_category(entropy), 'diversity': round(diversity, 2), 'has_repeating_chars': has_repeating_chars, 'has_sequential_pattern': has_sequential, 'has_common_words': has_pattern, 'likely_secret': entropy >= 4.5 and not (has_repeating_chars or has_pattern), 'confidence': round((entropy / 8.0) * (diversity) * 100, 1) }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/skutry/credential-free'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

entropy.py•4.07 KiB