firewalla-mcp-server

enhanced-correlation.ts•24.5 KiB

/** * Enhanced Correlation Algorithms with Scoring and Fuzzy Matching * Provides intelligent correlation scoring and flexible matching strategies * * Weight Handling Logic: * ===================== * * Field weights control the importance of each field in correlation scoring. * The weight fallback hierarchy is: * * 1. Explicit field weight: weights[field] - Used if it's a valid number (including 0) * 2. Default weight: weights.default - Used if field weight is invalid * 3. Hardcoded fallback: 0.5 - Used as last resort * * Special Weight Values: * - 0: Field is completely ignored (not included in weighted average calculation) * - 0.1 to 1.0: Field contributes to scoring with given weight * - null/undefined/false/string: Invalid, falls back to default or 0.5 * * Examples: * - weights = { source_ip: 0 } → source_ip ignored completely * - weights = { source_ip: undefined } → uses default weight * - weights = { source_ip: 0.8 } → source_ip weighted at 80% importance * * Backward Compatibility: * - Existing code using nullish coalescing (??) continues to work * - Zero weights now properly excluded from calculations * - Invalid weights (non-numbers) properly fall back to defaults */ import { getFieldValue, normalizeFieldValue, type EntityType, type MappableEntity, type FieldValue } from './field-mapper.js'; /** * Utility function for consistent rounding to 3 decimal places */ function roundScore(score: number): number { return Math.round(score * 1000) / 1000; } /** * Validate that a correlation score is within expected bounds */ function validateScore(score: number, context: string): number { if (score < 0 || score > 1 || isNaN(score)) { throw new Error(`Invalid correlation score ${score} in ${context}. Scores must be between 0 and 1.`); } return roundScore(score); } /** * Configuration for correlation scoring weights */ export type CorrelationWeights = Record<string, number>; /** * Validates and resolves field weight with proper fallback handling * * @param field - The field name to get weight for * @param weights - The weights configuration object * @returns Validated weight value between 0 and 1 */ export function resolveFieldWeight(field: string, weights: CorrelationWeights): number { let fieldWeight: number; // Check if field weight is explicitly defined (including zero) if (Object.prototype.hasOwnProperty.call(weights, field) && typeof weights[field] === 'number' && Number.isFinite(weights[field])) { fieldWeight = weights[field]; } else if (Object.prototype.hasOwnProperty.call(weights, 'default') && typeof weights.default === 'number' && Number.isFinite(weights.default)) { fieldWeight = weights.default; } else { fieldWeight = 0.5; // Final fallback } // Clamp to valid range [0, 1] return Math.max(0, Math.min(1, fieldWeight)); } /** * Default field weights for correlation scoring */ export const DEFAULT_CORRELATION_WEIGHTS: CorrelationWeights = { // Network identifiers (high confidence) 'source_ip': 1.0, 'destination_ip': 1.0, 'device_ip': 1.0, 'device_id': 1.0, 'gid': 1.0, // Protocol and network details (high confidence) 'protocol': 0.9, 'port': 0.8, 'asn': 0.8, // Geographic fields (medium-high confidence) 'country': 0.7, 'region': 0.6, 'city': 0.5, // Application fields (medium confidence) 'application': 0.7, 'user_agent': 0.6, 'ssl_subject': 0.8, 'ssl_issuer': 0.8, // Behavioral patterns (medium confidence) 'session_duration': 0.5, 'frequency_score': 0.6, 'bytes_per_session': 0.5, 'connection_pattern': 0.6, // Temporal fields (lower confidence due to time variance) 'timestamp': 0.4, 'hour_of_day': 0.3, 'day_of_week': 0.2, // Default weight for unspecified fields (fallback for unknown field types) 'default': 0.5 }; /** * Fuzzy matching configuration */ export interface FuzzyMatchConfig { enabled: boolean; stringThreshold: number; // 0.0-1.0, higher = more strict ipSubnetMatching: boolean; numericTolerance: number; // percentage tolerance for numeric values geographicRadius: number; // km radius for geographic fuzzy matching } /** * Default fuzzy matching configuration */ export const DEFAULT_FUZZY_CONFIG: FuzzyMatchConfig = { enabled: true, stringThreshold: 0.8, ipSubnetMatching: true, numericTolerance: 0.1, // 10% tolerance geographicRadius: 50 // 50km radius }; /** * Enhanced correlation result with scoring */ export interface ScoredCorrelationResult { entity: MappableEntity; correlationScore: number; fieldScores: Record<string, number>; fieldMatchTypes: Record<string, 'exact' | 'fuzzy' | 'partial'>; matchType: 'exact' | 'fuzzy' | 'partial'; confidence: 'high' | 'medium' | 'low'; } /** * Enhanced correlation statistics with scoring details */ export interface EnhancedCorrelationStats { totalSecondaryResults: number; correlatedResults: number; averageScore: number; scoreDistribution: { high: number; // score >= 0.8 medium: number; // score >= 0.5 low: number; // score < 0.5 }; fieldStatistics: Record<string, { exactMatches: number; fuzzyMatches: number; partialMatches: number; averageScore: number; }>; fuzzyMatchingEnabled: boolean; totalProcessingTime: number; } /** * Perform enhanced multi-field correlation with scoring and fuzzy matching */ export function performEnhancedCorrelation( primaryResults: MappableEntity[], secondaryResults: MappableEntity[], primaryType: EntityType, secondaryType: EntityType, correlationFields: string[], correlationType: 'AND' | 'OR', weights: CorrelationWeights = DEFAULT_CORRELATION_WEIGHTS, fuzzyConfig: FuzzyMatchConfig = DEFAULT_FUZZY_CONFIG, minimumScore: number = 0.3 ): { correlatedResults: ScoredCorrelationResult[]; stats: EnhancedCorrelationStats } { const startTime = Date.now(); const correlatedResults: ScoredCorrelationResult[] = []; // Extract correlation values from primary results for each field const primaryFieldValues = correlationFields.map(field => extractFieldValuesWithMetadata(primaryResults, field, primaryType) ); // Score each secondary result for (const secondaryItem of secondaryResults) { const correlationResult = scoreEntityCorrelation( secondaryItem, primaryFieldValues, correlationFields, secondaryType, correlationType, weights, fuzzyConfig ); if (correlationResult.correlationScore >= minimumScore) { correlatedResults.push(correlationResult); } } // Sort by correlation score (descending) correlatedResults.sort((a, b) => b.correlationScore - a.correlationScore); // Generate enhanced statistics const stats = generateEnhancedStats( correlatedResults, secondaryResults.length, correlationFields, fuzzyConfig.enabled, Date.now() - startTime ); return { correlatedResults, stats }; } /** * Extract field values with additional metadata for scoring */ function extractFieldValuesWithMetadata( results: MappableEntity[], field: string, entityType: EntityType ): { values: Set<unknown>; metadata: Map<unknown, { count: number; quality: number }> } { const values = new Set<unknown>(); const metadata = new Map<unknown, { count: number; quality: number }>(); for (const entity of results) { const value = getFieldValue(entity, field, entityType); if (value !== undefined && value !== null && value !== '') { const normalizedValue = normalizeFieldValue(value, field); values.add(normalizedValue); // Track value occurrence and assess data quality const existing = metadata.get(normalizedValue) || { count: 0, quality: 1.0 }; existing.count += 1; // Assess data quality based on completeness and format const quality = assessDataQuality(value, field); existing.quality = Math.max(existing.quality, quality); metadata.set(normalizedValue, existing); } } return { values, metadata }; } /** * Score a single entity's correlation against primary results */ function scoreEntityCorrelation( entity: MappableEntity, primaryFieldValues: Array<{ values: Set<unknown>; metadata: Map<unknown, { count: number; quality: number }> }>, correlationFields: string[], entityType: EntityType, correlationType: 'AND' | 'OR', weights: CorrelationWeights, fuzzyConfig: FuzzyMatchConfig ): ScoredCorrelationResult { const fieldScores: Record<string, number> = {}; const fieldMatchTypes: Record<string, 'exact' | 'fuzzy' | 'partial'> = {}; let totalWeightedScore = 0; let totalWeight = 0; let exactMatches = 0; let fuzzyMatches = 0; // Score each correlation field for (let i = 0; i < correlationFields.length; i++) { const field = correlationFields[i]; // Resolve field weight using the centralized validation logic const fieldWeight = resolveFieldWeight(field, weights); // Skip processing if weight is zero (field should be ignored completely) if (fieldWeight === 0) { fieldScores[field] = 0; fieldMatchTypes[field] = 'partial'; // Important: Don't add to totalWeight when weight is zero continue; } const primaryValues = primaryFieldValues[i]; const entityValue = getFieldValue(entity, field, entityType); if (entityValue === undefined || entityValue === null) { fieldScores[field] = 0; fieldMatchTypes[field] = 'partial'; totalWeight += fieldWeight; continue; } const normalizedValue = normalizeFieldValue(entityValue, field); // Calculate field correlation score const fieldScore = calculateFieldScore( normalizedValue, primaryValues, field, fuzzyConfig ); fieldScores[field] = fieldScore.score; fieldMatchTypes[field] = fieldScore.matchType; totalWeightedScore += fieldScore.score * fieldWeight; totalWeight += fieldWeight; // Track match types if (fieldScore.matchType === 'exact') {exactMatches++;} else if (fieldScore.matchType === 'fuzzy') {fuzzyMatches++;} } // Calculate overall correlation score const correlationScore = totalWeight > 0 ? totalWeightedScore / totalWeight : 0; // Correlation Penalty Logic: Adjust scores based on correlation type let finalScore = correlationScore; if (correlationType === 'AND') { // AND Correlation Penalty Strategy: // // Problem: AND correlations should require ALL fields to match for high confidence. // A simple average might give high scores even when some fields don't match. // // Solution: Apply a "completeness penalty" that multiplies the base score by // the ratio of matching fields to total fields. This creates exponential // penalty for missing matches: // // Examples: // - 3/3 fields match: 100% score (no penalty) // - 2/3 fields match: 67% of base score (33% penalty) // - 1/3 fields match: 33% of base score (67% penalty) // - 0/3 fields match: 0% score (100% penalty) // // This ensures AND correlations have stringent requirements while still // allowing partial matches to receive proportionally lower scores. const matchingFields = Object.values(fieldScores).filter(score => score > 0).length; const completeness = matchingFields / correlationFields.length; finalScore = correlationScore * completeness; // Note: OR correlations use the base weighted average without penalty, // as they should succeed when ANY field matches strongly. } // Determine match type and confidence const matchType = exactMatches > 0 ? 'exact' : fuzzyMatches > 0 ? 'fuzzy' : 'partial'; const confidence = finalScore >= 0.8 ? 'high' : finalScore >= 0.5 ? 'medium' : 'low'; return { entity, correlationScore: validateScore(finalScore, `entity correlation for ${correlationFields.join(', ')}`), fieldScores, fieldMatchTypes, matchType, confidence }; } /** * Calculate correlation score for a specific field */ function calculateFieldScore( entityValue: unknown, primaryValues: { values: Set<unknown>; metadata: Map<unknown, { count: number; quality: number }> }, field: string, fuzzyConfig: FuzzyMatchConfig ): { score: number; matchType: 'exact' | 'fuzzy' | 'partial' } { // Check for exact match first if (primaryValues.values.has(entityValue)) { // Cap exact matches at 1.0 to maintain scoring consistency // Quality information could be preserved separately if needed return { score: 1.0, matchType: 'exact' }; } // Try fuzzy matching if enabled if (fuzzyConfig.enabled) { const fuzzyScore = calculateFuzzyScore(entityValue, primaryValues.values, field, fuzzyConfig); if (fuzzyScore > 0) { return { score: fuzzyScore, matchType: 'fuzzy' }; } } // No match found return { score: 0, matchType: 'partial' }; } /** * Calculate fuzzy matching score */ function calculateFuzzyScore( entityValue: unknown, primaryValues: Set<unknown>, field: string, fuzzyConfig: FuzzyMatchConfig ): number { let bestScore = 0; for (const primaryValue of primaryValues) { let score = 0; // IP address subnet matching if (field.includes('ip') && fuzzyConfig.ipSubnetMatching && typeof entityValue === 'string' && typeof primaryValue === 'string') { score = calculateIPSimilarity(entityValue, primaryValue); } // String similarity matching else if (typeof entityValue === 'string' && typeof primaryValue === 'string') { score = calculateStringSimilarity(entityValue, primaryValue, fuzzyConfig.stringThreshold); } // Numeric tolerance matching else if (typeof entityValue === 'number' && typeof primaryValue === 'number') { score = calculateNumericSimilarity(entityValue, primaryValue, fuzzyConfig.numericTolerance); } // Geographic proximity matching else if (field.includes('geo') || field === 'country' || field === 'city') { score = calculateGeographicSimilarity(entityValue, primaryValue); } bestScore = Math.max(bestScore, score); } return bestScore; } /** * Validate IPv4 address format */ function isValidIPv4Address(ip: string): boolean { const ipv4Regex = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/; const match = ip.match(ipv4Regex); if (!match) {return false;} // Check that each octet is 0-255 for (let i = 1; i <= 4; i++) { const octet = parseInt(match[i], 10); if (octet < 0 || octet > 255) {return false;} } return true; } /** * Calculate IP address similarity (subnet matching) */ /** * Calculates similarity between two IP addresses using subnet matching * * @param ip1 - First IP address to compare * @param ip2 - Second IP address to compare * @returns Similarity score between 0.0 and 1.0, where 1.0 is exact match */ export function calculateIPSimilarity(ip1: string, ip2: string): number { if (typeof ip1 !== 'string' || typeof ip2 !== 'string') {return 0;} // Validate IP address format before processing if (!isValidIPv4Address(ip1) || !isValidIPv4Address(ip2)) {return 0;} const parts1 = ip1.split('.'); const parts2 = ip2.split('.'); if (parts1.length !== 4 || parts2.length !== 4) {return 0;} let matchingOctets = 0; for (let i = 0; i < 4; i++) { if (parts1[i] === parts2[i]) { matchingOctets++; } else { break; // Subnet matching stops at first different octet } } // Score based on subnet size: /8=0.25, /16=0.5, /24=0.75, exact=1.0 return matchingOctets * 0.25; } /** * Calculate string similarity using Levenshtein distance */ /** * Calculates string similarity using Levenshtein distance algorithm * * @param str1 - First string to compare * @param str2 - Second string to compare * @param threshold - Minimum similarity threshold (0.0 to 1.0) * @returns Similarity score between 0.0 and 1.0, where 1.0 is exact match */ export function calculateStringSimilarity(str1: string, str2: string, threshold: number): number { if (str1 === str2) {return 1.0;} const maxLength = Math.max(str1.length, str2.length); if (maxLength === 0) {return 1.0;} const distance = levenshteinDistance(str1.toLowerCase(), str2.toLowerCase()); const similarity = 1 - (distance / maxLength); return similarity >= threshold ? similarity * 0.8 : 0; // Cap fuzzy string matches at 0.8 } /** * Calculate Levenshtein distance between two strings */ function levenshteinDistance(str1: string, str2: string): number { const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null)); for (let i = 0; i <= str1.length; i++) {matrix[0][i] = i;} for (let j = 0; j <= str2.length; j++) {matrix[j][0] = j;} for (let j = 1; j <= str2.length; j++) { for (let i = 1; i <= str1.length; i++) { const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1; matrix[j][i] = Math.min( matrix[j][i - 1] + 1, // deletion matrix[j - 1][i] + 1, // insertion matrix[j - 1][i - 1] + indicator // substitution ); } } return matrix[str2.length][str1.length]; } /** * Calculates similarity between two numeric values with tolerance * * @param num1 - First number to compare * @param num2 - Second number to compare * @param tolerance - Acceptable tolerance for considering values similar (0.0 to 1.0) * @returns Similarity score between 0.0 and 1.0, where 1.0 is exact match */ export function calculateNumericSimilarity(num1: number, num2: number, tolerance: number): number { const diff = Math.abs(num1 - num2); const maxValue = Math.max(Math.abs(num1), Math.abs(num2)); if (maxValue === 0) {return num1 === num2 ? 1.0 : 0;} const relativeDiff = diff / maxValue; if (relativeDiff <= tolerance) { return Math.max(0, 1 - (relativeDiff / tolerance)) * 0.7; // Cap numeric fuzzy at 0.7 } return 0; } /** * Calculate geographic similarity (simplified) */ function calculateGeographicSimilarity(geo1: unknown, geo2: unknown): number { // Simple string-based geographic similarity if (typeof geo1 === 'string' && typeof geo2 === 'string') { return calculateStringSimilarity(geo1, geo2, 0.7) * 0.6; // Cap geo fuzzy at 0.6 } return 0; } /** * Assess data quality for scoring bonus */ function assessDataQuality(value: unknown, field: string): number { let quality = 1.0; // Penalize empty or default values if (!value || value === '' || value === '0.0.0.0' || value === 'unknown' || value === '127.0.0.1' || value === '255.255.255.255' || value === '::1' || value === '0.0.0.0/0' || value === 'localhost') { quality -= 0.3; } // Bonus for well-formatted data if (field.includes('ip') && typeof value === 'string') { const ipRegex = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/; if (ipRegex.test(value)) {quality += 0.1;} } return Math.max(0, Math.min(1.0, quality)); } /** * Generate enhanced correlation statistics */ function generateEnhancedStats( correlatedResults: ScoredCorrelationResult[], totalSecondaryResults: number, correlationFields: string[], fuzzyEnabled: boolean, processingTime: number ): EnhancedCorrelationStats { const averageScore = correlatedResults.length > 0 ? correlatedResults.reduce((sum, result) => sum + result.correlationScore, 0) / correlatedResults.length : 0; // Score distribution const scoreDistribution = { high: correlatedResults.filter(r => r.correlationScore >= 0.8).length, medium: correlatedResults.filter(r => r.correlationScore >= 0.5 && r.correlationScore < 0.8).length, low: correlatedResults.filter(r => r.correlationScore < 0.5).length }; // Field statistics const fieldStatistics: Record<string, any> = {}; for (const field of correlationFields) { const exactMatches = correlatedResults.filter(r => r.fieldMatchTypes[field] === 'exact').length; const fuzzyMatches = correlatedResults.filter(r => r.fieldMatchTypes[field] === 'fuzzy').length; const partialMatches = correlatedResults.filter(r => r.fieldMatchTypes[field] === 'partial').length; const fieldScores = correlatedResults .map(r => r.fieldScores[field] || 0) .filter(score => score > 0); const averageFieldScore = fieldScores.length > 0 ? fieldScores.reduce((sum, score) => sum + score, 0) / fieldScores.length : 0; fieldStatistics[field] = { exactMatches, fuzzyMatches, partialMatches, averageScore: roundScore(averageFieldScore) }; } return { totalSecondaryResults, correlatedResults: correlatedResults.length, averageScore: roundScore(averageScore), scoreDistribution, fieldStatistics, fuzzyMatchingEnabled: fuzzyEnabled, totalProcessingTime: processingTime }; } /** * Type guard to check if a value is a valid FieldValue */ function isValidFieldValue(value: unknown): value is FieldValue { return typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean' || value === null || value === undefined; } /** * Simple client-side correlation function that matches entities based on a single field * This function provides basic correlation without API calls * * @param primaryResults - Array of primary results (e.g., flows) * @param secondaryResults - Array of secondary results (e.g., alarms) * @param correlationField - Field name to correlate on (e.g., 'source_ip') * @returns Array of correlated results with both primary and secondary data */ export function correlateResults( primaryResults: MappableEntity[], secondaryResults: MappableEntity[], correlationField: string ): Array<{ primary: MappableEntity; secondary: MappableEntity; correlationType: 'exact' | 'fuzzy'; correlationScore: number; }> { const correlatedResults: Array<{ primary: MappableEntity; secondary: MappableEntity; correlationType: 'exact' | 'fuzzy'; correlationScore: number; }> = []; // Build a map of primary values for efficient lookup const primaryValueMap = new Map<unknown, MappableEntity[]>(); for (const primaryItem of primaryResults) { // Get field value - supports nested paths like 'remote.ip' const value = getNestedFieldValue(primaryItem, correlationField); if (value !== undefined && value !== null && value !== '' && isValidFieldValue(value)) { const normalizedValue = normalizeFieldValue(value, correlationField); if (!primaryValueMap.has(normalizedValue)) { primaryValueMap.set(normalizedValue, []); } primaryValueMap.get(normalizedValue)!.push(primaryItem); } } // Correlate secondary results for (const secondaryItem of secondaryResults) { const secondaryValue = getNestedFieldValue(secondaryItem, correlationField); if (secondaryValue !== undefined && secondaryValue !== null && secondaryValue !== '' && isValidFieldValue(secondaryValue)) { const normalizedSecondaryValue = normalizeFieldValue(secondaryValue, correlationField); // Check for exact match first if (primaryValueMap.has(normalizedSecondaryValue)) { const matchingPrimaries = primaryValueMap.get(normalizedSecondaryValue)!; for (const primary of matchingPrimaries) { correlatedResults.push({ primary, secondary: secondaryItem, correlationType: 'exact', correlationScore: 1.0 }); } } else if (correlationField.includes('ip')) { // For IP fields, try fuzzy subnet matching for (const [primaryValue, primaryItems] of primaryValueMap.entries()) { if (typeof primaryValue === 'string' && typeof normalizedSecondaryValue === 'string') { const similarity = calculateIPSimilarity(normalizedSecondaryValue, primaryValue); if (similarity >= 0.5) { // At least /16 subnet match for (const primary of primaryItems) { correlatedResults.push({ primary, secondary: secondaryItem, correlationType: 'fuzzy', correlationScore: similarity }); } } } } } } } // Sort by correlation score (highest first) correlatedResults.sort((a, b) => b.correlationScore - a.correlationScore); return correlatedResults; } /** * Get nested field value from an object using dot notation * Handles paths like 'remote.ip' or 'device.mac' */ function getNestedFieldValue(obj: any, path: string): unknown { const parts = path.split('.'); let current = obj; for (const part of parts) { if (current && typeof current === 'object' && part in current) { current = current[part]; } else { return undefined; } } return current; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/amittell/firewalla-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

enhanced-correlation.ts•24.5 KiB