resource-similarity-service.ts•19.5 kB
import { NodeRepository } from '../database/node-repository';
import { logger } from '../utils/logger';
import { ValidationServiceError } from '../errors/validation-service-error';
export interface ResourceSuggestion {
  value: string;
  confidence: number;
  reason: string;
  availableOperations?: string[];
}
interface ResourcePattern {
  pattern: string;
  suggestion: string;
  confidence: number;
  reason: string;
}
export class ResourceSimilarityService {
  private static readonly CACHE_DURATION_MS = 5 * 60 * 1000; // 5 minutes
  private static readonly MIN_CONFIDENCE = 0.3; // 30% minimum confidence to suggest
  private static readonly MAX_SUGGESTIONS = 5;
  // Confidence thresholds for better code clarity
  private static readonly CONFIDENCE_THRESHOLDS = {
    EXACT: 1.0,
    VERY_HIGH: 0.95,
    HIGH: 0.8,
    MEDIUM: 0.6,
    MIN_SUBSTRING: 0.7
  } as const;
  private repository: NodeRepository;
  private resourceCache: Map<string, { resources: any[], timestamp: number }> = new Map();
  private suggestionCache: Map<string, ResourceSuggestion[]> = new Map();
  private commonPatterns: Map<string, ResourcePattern[]>;
  constructor(repository: NodeRepository) {
    this.repository = repository;
    this.commonPatterns = this.initializeCommonPatterns();
  }
  /**
   * Clean up expired cache entries to prevent memory leaks
   */
  private cleanupExpiredEntries(): void {
    const now = Date.now();
    // Clean resource cache
    for (const [key, value] of this.resourceCache.entries()) {
      if (now - value.timestamp >= ResourceSimilarityService.CACHE_DURATION_MS) {
        this.resourceCache.delete(key);
      }
    }
    // Clean suggestion cache - these don't have timestamps, so clear if cache is too large
    if (this.suggestionCache.size > 100) {
      // Keep only the most recent 50 entries
      const entries = Array.from(this.suggestionCache.entries());
      this.suggestionCache.clear();
      entries.slice(-50).forEach(([key, value]) => {
        this.suggestionCache.set(key, value);
      });
    }
  }
  /**
   * Initialize common resource mistake patterns
   */
  private initializeCommonPatterns(): Map<string, ResourcePattern[]> {
    const patterns = new Map<string, ResourcePattern[]>();
    // Google Drive patterns
    patterns.set('googleDrive', [
      { pattern: 'files', suggestion: 'file', confidence: 0.95, reason: 'Use singular "file" not plural' },
      { pattern: 'folders', suggestion: 'folder', confidence: 0.95, reason: 'Use singular "folder" not plural' },
      { pattern: 'permissions', suggestion: 'permission', confidence: 0.9, reason: 'Use singular form' },
      { pattern: 'fileAndFolder', suggestion: 'fileFolder', confidence: 0.9, reason: 'Use "fileFolder" for combined operations' },
      { pattern: 'driveFiles', suggestion: 'file', confidence: 0.8, reason: 'Use "file" for file operations' },
      { pattern: 'sharedDrives', suggestion: 'drive', confidence: 0.85, reason: 'Use "drive" for shared drive operations' },
    ]);
    // Slack patterns
    patterns.set('slack', [
      { pattern: 'messages', suggestion: 'message', confidence: 0.95, reason: 'Use singular "message" not plural' },
      { pattern: 'channels', suggestion: 'channel', confidence: 0.95, reason: 'Use singular "channel" not plural' },
      { pattern: 'users', suggestion: 'user', confidence: 0.95, reason: 'Use singular "user" not plural' },
      { pattern: 'msg', suggestion: 'message', confidence: 0.85, reason: 'Use full "message" not abbreviation' },
      { pattern: 'dm', suggestion: 'message', confidence: 0.7, reason: 'Use "message" for direct messages' },
      { pattern: 'conversation', suggestion: 'channel', confidence: 0.7, reason: 'Use "channel" for conversations' },
    ]);
    // Database patterns (postgres, mysql, mongodb)
    patterns.set('database', [
      { pattern: 'tables', suggestion: 'table', confidence: 0.95, reason: 'Use singular "table" not plural' },
      { pattern: 'queries', suggestion: 'query', confidence: 0.95, reason: 'Use singular "query" not plural' },
      { pattern: 'collections', suggestion: 'collection', confidence: 0.95, reason: 'Use singular "collection" not plural' },
      { pattern: 'documents', suggestion: 'document', confidence: 0.95, reason: 'Use singular "document" not plural' },
      { pattern: 'records', suggestion: 'record', confidence: 0.85, reason: 'Use "record" or "document"' },
      { pattern: 'rows', suggestion: 'row', confidence: 0.9, reason: 'Use singular "row"' },
    ]);
    // Google Sheets patterns
    patterns.set('googleSheets', [
      { pattern: 'sheets', suggestion: 'sheet', confidence: 0.95, reason: 'Use singular "sheet" not plural' },
      { pattern: 'spreadsheets', suggestion: 'spreadsheet', confidence: 0.95, reason: 'Use singular "spreadsheet"' },
      { pattern: 'cells', suggestion: 'cell', confidence: 0.9, reason: 'Use singular "cell"' },
      { pattern: 'ranges', suggestion: 'range', confidence: 0.9, reason: 'Use singular "range"' },
      { pattern: 'worksheets', suggestion: 'sheet', confidence: 0.8, reason: 'Use "sheet" for worksheet operations' },
    ]);
    // Email patterns
    patterns.set('email', [
      { pattern: 'emails', suggestion: 'email', confidence: 0.95, reason: 'Use singular "email" not plural' },
      { pattern: 'messages', suggestion: 'message', confidence: 0.9, reason: 'Use "message" for email operations' },
      { pattern: 'mails', suggestion: 'email', confidence: 0.9, reason: 'Use "email" not "mail"' },
      { pattern: 'attachments', suggestion: 'attachment', confidence: 0.95, reason: 'Use singular "attachment"' },
    ]);
    // Generic plural/singular patterns
    patterns.set('generic', [
      { pattern: 'items', suggestion: 'item', confidence: 0.9, reason: 'Use singular form' },
      { pattern: 'objects', suggestion: 'object', confidence: 0.9, reason: 'Use singular form' },
      { pattern: 'entities', suggestion: 'entity', confidence: 0.9, reason: 'Use singular form' },
      { pattern: 'resources', suggestion: 'resource', confidence: 0.9, reason: 'Use singular form' },
      { pattern: 'elements', suggestion: 'element', confidence: 0.9, reason: 'Use singular form' },
    ]);
    return patterns;
  }
  /**
   * Find similar resources for an invalid resource using pattern matching
   * and Levenshtein distance algorithms
   *
   * @param nodeType - The n8n node type (e.g., 'nodes-base.googleDrive')
   * @param invalidResource - The invalid resource provided by the user
   * @param maxSuggestions - Maximum number of suggestions to return (default: 5)
   * @returns Array of resource suggestions sorted by confidence
   *
   * @example
   * findSimilarResources('nodes-base.googleDrive', 'files', 3)
   * // Returns: [{ value: 'file', confidence: 0.95, reason: 'Use singular "file" not plural' }]
   */
  findSimilarResources(
    nodeType: string,
    invalidResource: string,
    maxSuggestions: number = ResourceSimilarityService.MAX_SUGGESTIONS
  ): ResourceSuggestion[] {
    // Clean up expired cache entries periodically
    if (Math.random() < 0.1) { // 10% chance to cleanup on each call
      this.cleanupExpiredEntries();
    }
    // Check cache first
    const cacheKey = `${nodeType}:${invalidResource}`;
    if (this.suggestionCache.has(cacheKey)) {
      return this.suggestionCache.get(cacheKey)!;
    }
    const suggestions: ResourceSuggestion[] = [];
    // Get valid resources for the node
    const validResources = this.getNodeResources(nodeType);
    // Early termination for exact match - no suggestions needed
    for (const resource of validResources) {
      const resourceValue = this.getResourceValue(resource);
      if (resourceValue.toLowerCase() === invalidResource.toLowerCase()) {
        return []; // Valid resource, no suggestions needed
      }
    }
    // Check for exact pattern matches first
    const nodePatterns = this.getNodePatterns(nodeType);
    for (const pattern of nodePatterns) {
      if (pattern.pattern.toLowerCase() === invalidResource.toLowerCase()) {
        // Check if the suggested resource actually exists with type safety
        const exists = validResources.some(r => {
          const resourceValue = this.getResourceValue(r);
          return resourceValue === pattern.suggestion;
        });
        if (exists) {
          suggestions.push({
            value: pattern.suggestion,
            confidence: pattern.confidence,
            reason: pattern.reason
          });
        }
      }
    }
    // Handle automatic plural/singular conversion
    const singularForm = this.toSingular(invalidResource);
    const pluralForm = this.toPlural(invalidResource);
    for (const resource of validResources) {
      const resourceValue = this.getResourceValue(resource);
      // Check for plural/singular match
      if (resourceValue === singularForm || resourceValue === pluralForm) {
        if (!suggestions.some(s => s.value === resourceValue)) {
          suggestions.push({
            value: resourceValue,
            confidence: 0.9,
            reason: invalidResource.endsWith('s') ?
              'Use singular form for resources' :
              'Incorrect plural/singular form',
            availableOperations: typeof resource === 'object' ? resource.operations : undefined
          });
        }
      }
      // Calculate similarity
      const similarity = this.calculateSimilarity(invalidResource, resourceValue);
      if (similarity >= ResourceSimilarityService.MIN_CONFIDENCE) {
        if (!suggestions.some(s => s.value === resourceValue)) {
          suggestions.push({
            value: resourceValue,
            confidence: similarity,
            reason: this.getSimilarityReason(similarity, invalidResource, resourceValue),
            availableOperations: typeof resource === 'object' ? resource.operations : undefined
          });
        }
      }
    }
    // Sort by confidence and limit
    suggestions.sort((a, b) => b.confidence - a.confidence);
    const topSuggestions = suggestions.slice(0, maxSuggestions);
    // Cache the result
    this.suggestionCache.set(cacheKey, topSuggestions);
    return topSuggestions;
  }
  /**
   * Type-safe extraction of resource value from various formats
   * @param resource - Resource object or string
   * @returns The resource value as a string
   */
  private getResourceValue(resource: any): string {
    if (typeof resource === 'string') {
      return resource;
    }
    if (typeof resource === 'object' && resource !== null) {
      return resource.value || '';
    }
    return '';
  }
  /**
   * Get resources for a node with caching
   */
  private getNodeResources(nodeType: string): any[] {
    // Cleanup cache periodically
    if (Math.random() < 0.05) { // 5% chance
      this.cleanupExpiredEntries();
    }
    const cacheKey = nodeType;
    const cached = this.resourceCache.get(cacheKey);
    if (cached && Date.now() - cached.timestamp < ResourceSimilarityService.CACHE_DURATION_MS) {
      return cached.resources;
    }
    const nodeInfo = this.repository.getNode(nodeType);
    if (!nodeInfo) return [];
    const resources: any[] = [];
    const resourceMap: Map<string, string[]> = new Map();
    // Parse properties for resource fields
    try {
      const properties = nodeInfo.properties || [];
      for (const prop of properties) {
        if (prop.name === 'resource' && prop.options) {
          for (const option of prop.options) {
            resources.push({
              value: option.value,
              name: option.name,
              operations: []
            });
            resourceMap.set(option.value, []);
          }
        }
        // Find operations for each resource
        if (prop.name === 'operation' && prop.displayOptions?.show?.resource) {
          const resourceValues = Array.isArray(prop.displayOptions.show.resource)
            ? prop.displayOptions.show.resource
            : [prop.displayOptions.show.resource];
          for (const resourceValue of resourceValues) {
            if (resourceMap.has(resourceValue) && prop.options) {
              const ops = prop.options.map((op: any) => op.value);
              resourceMap.get(resourceValue)!.push(...ops);
            }
          }
        }
      }
      // Update resources with their operations
      for (const resource of resources) {
        if (resourceMap.has(resource.value)) {
          resource.operations = resourceMap.get(resource.value);
        }
      }
      // If no explicit resources, check for common patterns
      if (resources.length === 0) {
        // Some nodes don't have explicit resource fields
        const implicitResources = this.extractImplicitResources(properties);
        resources.push(...implicitResources);
      }
    } catch (error) {
      logger.warn(`Failed to extract resources for ${nodeType}:`, error);
    }
    // Cache and return
    this.resourceCache.set(cacheKey, { resources, timestamp: Date.now() });
    return resources;
  }
  /**
   * Extract implicit resources from node properties
   */
  private extractImplicitResources(properties: any[]): any[] {
    const resources: any[] = [];
    // Look for properties that suggest resources
    for (const prop of properties) {
      if (prop.name === 'operation' && prop.options) {
        // If there's no explicit resource field, operations might imply resources
        const resourceFromOps = this.inferResourceFromOperations(prop.options);
        if (resourceFromOps) {
          resources.push({
            value: resourceFromOps,
            name: resourceFromOps.charAt(0).toUpperCase() + resourceFromOps.slice(1),
            operations: prop.options.map((op: any) => op.value)
          });
        }
      }
    }
    return resources;
  }
  /**
   * Infer resource type from operations
   */
  private inferResourceFromOperations(operations: any[]): string | null {
    // Common patterns in operation names that suggest resources
    const patterns = [
      { keywords: ['file', 'upload', 'download'], resource: 'file' },
      { keywords: ['folder', 'directory'], resource: 'folder' },
      { keywords: ['message', 'send', 'reply'], resource: 'message' },
      { keywords: ['channel', 'broadcast'], resource: 'channel' },
      { keywords: ['user', 'member'], resource: 'user' },
      { keywords: ['table', 'row', 'column'], resource: 'table' },
      { keywords: ['document', 'doc'], resource: 'document' },
    ];
    for (const pattern of patterns) {
      for (const op of operations) {
        const opName = (op.value || op).toLowerCase();
        if (pattern.keywords.some(keyword => opName.includes(keyword))) {
          return pattern.resource;
        }
      }
    }
    return null;
  }
  /**
   * Get patterns for a specific node type
   */
  private getNodePatterns(nodeType: string): ResourcePattern[] {
    const patterns: ResourcePattern[] = [];
    // Add node-specific patterns
    if (nodeType.includes('googleDrive')) {
      patterns.push(...(this.commonPatterns.get('googleDrive') || []));
    } else if (nodeType.includes('slack')) {
      patterns.push(...(this.commonPatterns.get('slack') || []));
    } else if (nodeType.includes('postgres') || nodeType.includes('mysql') || nodeType.includes('mongodb')) {
      patterns.push(...(this.commonPatterns.get('database') || []));
    } else if (nodeType.includes('googleSheets')) {
      patterns.push(...(this.commonPatterns.get('googleSheets') || []));
    } else if (nodeType.includes('gmail') || nodeType.includes('email')) {
      patterns.push(...(this.commonPatterns.get('email') || []));
    }
    // Always add generic patterns
    patterns.push(...(this.commonPatterns.get('generic') || []));
    return patterns;
  }
  /**
   * Convert to singular form (simple heuristic)
   */
  private toSingular(word: string): string {
    if (word.endsWith('ies')) {
      return word.slice(0, -3) + 'y';
    } else if (word.endsWith('es')) {
      return word.slice(0, -2);
    } else if (word.endsWith('s') && !word.endsWith('ss')) {
      return word.slice(0, -1);
    }
    return word;
  }
  /**
   * Convert to plural form (simple heuristic)
   */
  private toPlural(word: string): string {
    if (word.endsWith('y') && !['ay', 'ey', 'iy', 'oy', 'uy'].includes(word.slice(-2))) {
      return word.slice(0, -1) + 'ies';
    } else if (word.endsWith('s') || word.endsWith('x') || word.endsWith('z') ||
               word.endsWith('ch') || word.endsWith('sh')) {
      return word + 'es';
    } else {
      return word + 's';
    }
  }
  /**
   * Calculate similarity between two strings using Levenshtein distance
   */
  private calculateSimilarity(str1: string, str2: string): number {
    const s1 = str1.toLowerCase();
    const s2 = str2.toLowerCase();
    // Exact match
    if (s1 === s2) return 1.0;
    // One is substring of the other
    if (s1.includes(s2) || s2.includes(s1)) {
      const ratio = Math.min(s1.length, s2.length) / Math.max(s1.length, s2.length);
      return Math.max(ResourceSimilarityService.CONFIDENCE_THRESHOLDS.MIN_SUBSTRING, ratio);
    }
    // Calculate Levenshtein distance
    const distance = this.levenshteinDistance(s1, s2);
    const maxLength = Math.max(s1.length, s2.length);
    // Convert distance to similarity
    let similarity = 1 - (distance / maxLength);
    // Boost confidence for single character typos and transpositions in short words
    if (distance === 1 && maxLength <= 5) {
      similarity = Math.max(similarity, 0.75);
    } else if (distance === 2 && maxLength <= 5) {
      // Boost for transpositions (e.g., "flie" -> "file")
      similarity = Math.max(similarity, 0.72);
    }
    return similarity;
  }
  /**
   * Calculate Levenshtein distance between two strings
   */
  private levenshteinDistance(str1: string, str2: string): number {
    const m = str1.length;
    const n = str2.length;
    const dp: number[][] = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0));
    for (let i = 0; i <= m; i++) dp[i][0] = i;
    for (let j = 0; j <= n; j++) dp[0][j] = j;
    for (let i = 1; i <= m; i++) {
      for (let j = 1; j <= n; j++) {
        if (str1[i - 1] === str2[j - 1]) {
          dp[i][j] = dp[i - 1][j - 1];
        } else {
          dp[i][j] = Math.min(
            dp[i - 1][j] + 1,    // deletion
            dp[i][j - 1] + 1,    // insertion
            dp[i - 1][j - 1] + 1 // substitution
          );
        }
      }
    }
    return dp[m][n];
  }
  /**
   * Generate a human-readable reason for the similarity
   * @param confidence - Similarity confidence score
   * @param invalid - The invalid resource string
   * @param valid - The valid resource string
   * @returns Human-readable explanation of the similarity
   */
  private getSimilarityReason(confidence: number, invalid: string, valid: string): string {
    const { VERY_HIGH, HIGH, MEDIUM } = ResourceSimilarityService.CONFIDENCE_THRESHOLDS;
    if (confidence >= VERY_HIGH) {
      return 'Almost exact match - likely a typo';
    } else if (confidence >= HIGH) {
      return 'Very similar - common variation';
    } else if (confidence >= MEDIUM) {
      return 'Similar resource name';
    } else if (invalid.includes(valid) || valid.includes(invalid)) {
      return 'Partial match';
    } else {
      return 'Possibly related resource';
    }
  }
  /**
   * Clear caches
   */
  clearCache(): void {
    this.resourceCache.clear();
    this.suggestionCache.clear();
  }
}