naming.rs•24.1 kB
//! Naming pattern analysis and recognition
#[cfg(feature = "napi-bindings")]
use napi_derive::napi;
use crate::patterns::types::{Pattern, PatternExample, NamingPattern, PatternExtractor};
use crate::types::{ParseError, LineRange, SemanticConcept};
use std::collections::HashMap;
use walkdir::WalkDir;
use std::fs;
use regex::Regex;
/// Analyzer for detecting and learning naming conventions
#[cfg_attr(feature = "napi-bindings", napi)]
pub struct NamingPatternAnalyzer {
    patterns: HashMap<String, NamingPattern>,
    naming_rules: HashMap<String, Vec<NamingRule>>,
}
#[derive(Debug, Clone)]
struct NamingRule {
    rule_type: String,
    pattern: String,
    confidence_weight: f64,
}
#[cfg_attr(feature = "napi-bindings", napi)]
impl NamingPatternAnalyzer {
    #[cfg_attr(feature = "napi-bindings", napi(constructor))]
    pub fn new() -> Self {
        let mut analyzer = NamingPatternAnalyzer {
            patterns: HashMap::new(),
            naming_rules: HashMap::new(),
        };
        analyzer.initialize_rules();
        analyzer
    }
    /// Initialize common naming pattern rules
    fn initialize_rules(&mut self) {
        // JavaScript/TypeScript naming rules
        let js_rules = vec![
            NamingRule {
                rule_type: "camelCase".to_string(),
                pattern: r"^[a-z][a-zA-Z0-9]*$".to_string(),
                confidence_weight: 0.9,
            },
            NamingRule {
                rule_type: "PascalCase".to_string(),
                pattern: r"^[A-Z][a-zA-Z0-9]*$".to_string(),
                confidence_weight: 0.9,
            },
            NamingRule {
                rule_type: "CONSTANT_CASE".to_string(),
                pattern: r"^[A-Z][A-Z0-9_]*$".to_string(),
                confidence_weight: 0.8,
            },
        ];
        self.naming_rules.insert("javascript".to_string(), js_rules.clone());
        self.naming_rules.insert("typescript".to_string(), js_rules);
        // Rust naming rules
        let rust_rules = vec![
            NamingRule {
                rule_type: "snake_case".to_string(),
                pattern: r"^[a-z][a-z0-9_]*$".to_string(),
                confidence_weight: 0.9,
            },
            NamingRule {
                rule_type: "PascalCase".to_string(),
                pattern: r"^[A-Z][a-zA-Z0-9]*$".to_string(),
                confidence_weight: 0.9,
            },
            NamingRule {
                rule_type: "SCREAMING_SNAKE_CASE".to_string(),
                pattern: r"^[A-Z][A-Z0-9_]*$".to_string(),
                confidence_weight: 0.8,
            },
        ];
        self.naming_rules.insert("rust".to_string(), rust_rules);
        // Python naming rules
        let python_rules = vec![
            NamingRule {
                rule_type: "snake_case".to_string(),
                pattern: r"^[a-z][a-z0-9_]*$".to_string(),
                confidence_weight: 0.9,
            },
            NamingRule {
                rule_type: "PascalCase".to_string(),
                pattern: r"^[A-Z][a-zA-Z0-9]*$".to_string(),
                confidence_weight: 0.8,
            },
            NamingRule {
                rule_type: "CONSTANT_CASE".to_string(),
                pattern: r"^[A-Z][A-Z0-9_]*$".to_string(),
                confidence_weight: 0.8,
            },
        ];
        self.naming_rules.insert("python".to_string(), python_rules);
    }
    /// Analyze naming patterns from semantic concepts
    pub fn analyze_concepts(&mut self, concepts: &[SemanticConcept], language: &str) -> Result<Vec<Pattern>, ParseError> {
        let mut detected_patterns: HashMap<String, (u32, Vec<PatternExample>)> = HashMap::new();
        // Get naming rules for the language
        let rules = self.naming_rules.get(language).cloned().unwrap_or_else(|| {
            // Default to common patterns
            vec![
                NamingRule {
                    rule_type: "mixed".to_string(),
                    pattern: r".*".to_string(),
                    confidence_weight: 0.3,
                },
            ]
        });
        // Analyze each concept's name
        for concept in concepts {
            let name = &concept.name;
            
            for rule in &rules {
                if let Ok(regex) = Regex::new(&rule.pattern) {
                    if regex.is_match(name) {
                        let pattern_key = format!("{}_{}", rule.rule_type, self.get_context_type(&concept.concept_type));
                        
                        let example = PatternExample {
                            code: format!("{} {}", concept.concept_type, name),
                            file_path: concept.file_path.clone(),
                            line_range: concept.line_range.clone(),
                        };
                        let entry = detected_patterns.entry(pattern_key.clone()).or_insert((0, Vec::new()));
                        entry.0 += 1;
                        entry.1.push(example);
                        // Update internal naming pattern storage
                        let naming_pattern = NamingPattern {
                            pattern_type: rule.rule_type.clone(),
                            frequency: entry.0,
                            contexts: vec![self.get_context_type(&concept.concept_type)],
                            confidence: rule.confidence_weight,
                        };
                        self.patterns.insert(pattern_key, naming_pattern);
                        break;
                    }
                }
            }
        }
        // Convert to Pattern objects
        let mut patterns = Vec::new();
        for (pattern_key, (frequency, examples)) in detected_patterns {
            if let Some(naming_pattern) = self.patterns.get(&pattern_key) {
                let confidence = self.calculate_confidence(frequency, examples.len(), naming_pattern.confidence);
                
                patterns.push(Pattern {
                    id: format!("naming_{}", pattern_key),
                    pattern_type: "naming".to_string(),
                    description: format!(
                        "{} naming pattern for {} (used {} times)",
                        naming_pattern.pattern_type,
                        naming_pattern.contexts.join(", "),
                        frequency
                    ),
                    frequency,
                    confidence,
                    examples,
                    contexts: vec![language.to_string()],
                });
            }
        }
        Ok(patterns)
    }
    /// Detect violations of established naming patterns
    pub fn detect_violations(&self, concepts: &[SemanticConcept], language: &str) -> Vec<String> {
        let mut violations = Vec::new();
        
        // Get dominant patterns for this language/context
        let dominant_patterns = self.get_dominant_patterns(language);
        
        for concept in concepts {
            let context = self.get_context_type(&concept.concept_type);
            let expected_pattern = dominant_patterns.get(&context);
            
            if let Some(pattern) = expected_pattern {
                if !self.matches_pattern(&concept.name, &pattern.pattern_type) {
                    violations.push(format!(
                        "Naming violation in {}: '{}' should follow {} pattern (found in {}:{})",
                        concept.file_path,
                        concept.name,
                        pattern.pattern_type,
                        concept.file_path,
                        concept.line_range.start
                    ));
                }
            }
        }
        
        violations
    }
    /// Generate naming recommendations based on learned patterns
    pub fn generate_recommendations(&self, language: &str) -> Vec<String> {
        let mut recommendations = Vec::new();
        let dominant_patterns = self.get_dominant_patterns(language);
        
        for (context, pattern) in dominant_patterns {
            if pattern.confidence > 0.7 {
                recommendations.push(format!(
                    "Use {} for {} names (confidence: {:.2})",
                    pattern.pattern_type,
                    context,
                    pattern.confidence
                ));
            }
        }
        
        if recommendations.is_empty() {
            recommendations.push("Consider establishing consistent naming conventions".to_string());
        }
        
        recommendations
    }
    /// Learn naming patterns from file changes
    pub fn learn_from_changes(&mut self, old_code: &str, new_code: &str, language: &str) -> Result<Vec<Pattern>, ParseError> {
        // This is a simplified implementation - in practice you'd use AST diffing
        let old_names = self.extract_names_from_code(old_code, language);
        let new_names = self.extract_names_from_code(new_code, language);
        
        // Find newly introduced names
        let mut new_patterns = Vec::new();
        for name in &new_names {
            if !old_names.contains(name) {
                if let Some(pattern_type) = self.classify_name(name, language) {
                    // Update frequency and create pattern
                    let pattern_key = format!("{}_{}", pattern_type, "unknown");
                    
                    let entry = self.patterns.entry(pattern_key.clone()).or_insert(NamingPattern {
                        pattern_type: pattern_type.clone(),
                        frequency: 0,
                        contexts: vec!["unknown".to_string()],
                        confidence: 0.5,
                    });
                    entry.frequency += 1;
                    
                    new_patterns.push(Pattern {
                        id: format!("naming_{}", pattern_key),
                        pattern_type: "naming".to_string(),
                        description: format!("Detected {} pattern", pattern_type),
                        frequency: entry.frequency,
                        confidence: entry.confidence,
                        examples: vec![],
                        contexts: vec![language.to_string()],
                    });
                }
            }
        }
        
        Ok(new_patterns)
    }
    /// Get the dominant patterns for a language
    fn get_dominant_patterns(&self, language: &str) -> HashMap<String, &NamingPattern> {
        let mut dominant: HashMap<String, &NamingPattern> = HashMap::new();
        
        for (key, pattern) in &self.patterns {
            if key.contains(language) || pattern.contexts.contains(&language.to_string()) {
                let parts: Vec<&str> = key.split('_').collect();
                if parts.len() >= 2 {
                    let context = parts[parts.len() - 1];
                    
                    // Use the pattern with highest confidence for each context
                    match dominant.get(context) {
                        Some(existing) if existing.confidence < pattern.confidence => {
                            dominant.insert(context.to_string(), pattern);
                        }
                        None => {
                            dominant.insert(context.to_string(), pattern);
                        }
                        _ => {}
                    }
                }
            }
        }
        
        dominant
    }
    /// Check if a name matches a pattern type
    fn matches_pattern(&self, name: &str, pattern_type: &str) -> bool {
        match pattern_type {
            "camelCase" => {
                let regex = Regex::new(r"^[a-z][a-zA-Z0-9]*$").unwrap();
                regex.is_match(name)
            }
            "PascalCase" => {
                let regex = Regex::new(r"^[A-Z][a-zA-Z0-9]*$").unwrap();
                regex.is_match(name)
            }
            "snake_case" => {
                let regex = Regex::new(r"^[a-z][a-z0-9_]*$").unwrap();
                regex.is_match(name)
            }
            "CONSTANT_CASE" | "SCREAMING_SNAKE_CASE" => {
                let regex = Regex::new(r"^[A-Z][A-Z0-9_]*$").unwrap();
                regex.is_match(name)
            }
            _ => true, // Unknown patterns are considered matches
        }
    }
    /// Get context type from concept type
    fn get_context_type(&self, concept_type: &str) -> String {
        match concept_type {
            "class" | "interface" | "struct" => "type".to_string(),
            "function" | "method" => "function".to_string(),
            "variable" | "field" => "variable".to_string(),
            "constant" => "constant".to_string(),
            _ => "unknown".to_string(),
        }
    }
    /// Calculate confidence score for a pattern
    fn calculate_confidence(&self, frequency: u32, examples_count: usize, base_confidence: f64) -> f64 {
        let frequency_boost = (frequency as f64).log10().min(0.3);
        let examples_boost = (examples_count as f64 / 10.0).min(0.2);
        (base_confidence + frequency_boost + examples_boost).min(1.0)
    }
    /// Extract names from code (simplified implementation)
    fn extract_names_from_code(&self, code: &str, language: &str) -> Vec<String> {
        let mut names = Vec::new();
        
        match language {
            "javascript" | "typescript" => {
                // Simple regex-based extraction for demo
                let patterns = [
                    r"function\s+([a-zA-Z_][a-zA-Z0-9_]*)",
                    r"const\s+([a-zA-Z_][a-zA-Z0-9_]*)",
                    r"let\s+([a-zA-Z_][a-zA-Z0-9_]*)",
                    r"var\s+([a-zA-Z_][a-zA-Z0-9_]*)",
                    r"class\s+([a-zA-Z_][a-zA-Z0-9_]*)",
                ];
                
                for pattern_str in &patterns {
                    if let Ok(regex) = Regex::new(pattern_str) {
                        for captures in regex.captures_iter(code) {
                            if let Some(name) = captures.get(1) {
                                names.push(name.as_str().to_string());
                            }
                        }
                    }
                }
            }
            "rust" => {
                let patterns = [
                    r"fn\s+([a-zA-Z_][a-zA-Z0-9_]*)",
                    r"struct\s+([a-zA-Z_][a-zA-Z0-9_]*)",
                    r"enum\s+([a-zA-Z_][a-zA-Z0-9_]*)",
                    r"let\s+([a-zA-Z_][a-zA-Z0-9_]*)",
                    r"const\s+([A-Z_][A-Z0-9_]*)",
                ];
                
                for pattern_str in &patterns {
                    if let Ok(regex) = Regex::new(pattern_str) {
                        for captures in regex.captures_iter(code) {
                            if let Some(name) = captures.get(1) {
                                names.push(name.as_str().to_string());
                            }
                        }
                    }
                }
            }
            _ => {} // Add more languages as needed
        }
        
        names
    }
    /// Classify a name into a pattern type
    fn classify_name(&self, name: &str, language: &str) -> Option<String> {
        if let Some(rules) = self.naming_rules.get(language) {
            for rule in rules {
                if let Ok(regex) = Regex::new(&rule.pattern) {
                    if regex.is_match(name) {
                        return Some(rule.rule_type.clone());
                    }
                }
            }
        }
        None
    }
}
impl PatternExtractor for NamingPatternAnalyzer {
    fn extract_patterns(&self, path: &str) -> Result<Vec<Pattern>, ParseError> {
        let mut all_patterns = Vec::new();
        
        for entry in WalkDir::new(path).into_iter().filter_map(|e| e.ok()) {
            if entry.file_type().is_file() {
                let file_path = entry.path();
                if let Some(extension) = file_path.extension().and_then(|s| s.to_str()) {
                    let language = match extension.to_lowercase().as_str() {
                        "js" | "jsx" => "javascript",
                        "ts" | "tsx" => "typescript", 
                        "rs" => "rust",
                        "py" => "python",
                        _ => continue,
                    };
                    
                    if let Ok(content) = fs::read_to_string(file_path) {
                        let names = self.extract_names_from_code(&content, language);
                        
                        for name in names {
                            if let Some(pattern_type) = self.classify_name(&name, language) {
                                all_patterns.push(Pattern {
                                    id: format!("naming_{}_{}", pattern_type, name),
                                    pattern_type: "naming".to_string(),
                                    description: format!("{} naming pattern", pattern_type),
                                    frequency: 1,
                                    confidence: 0.7,
                                    examples: vec![PatternExample {
                                        code: name,
                                        file_path: file_path.to_string_lossy().to_string(),
                                        line_range: LineRange { start: 1, end: 1 },
                                    }],
                                    contexts: vec![language.to_string()],
                                });
                            }
                        }
                    }
                }
            }
        }
        
        Ok(all_patterns)
    }
}
impl Default for NamingPatternAnalyzer {
    fn default() -> Self {
        Self::new()
    }
}
#[cfg(test)]
mod tests {
    use super::*;
    use crate::types::SemanticConcept;
    use std::collections::HashMap;
    fn create_test_concept(name: &str, concept_type: &str, file_path: &str) -> SemanticConcept {
        SemanticConcept {
            id: format!("test_{}", name),
            name: name.to_string(),
            concept_type: concept_type.to_string(),
            confidence: 0.8,
            file_path: file_path.to_string(),
            line_range: LineRange { start: 1, end: 1 },
            relationships: HashMap::new(),
            metadata: HashMap::new(),
        }
    }
    #[test]
    fn test_naming_pattern_analyzer_creation() {
        let analyzer = NamingPatternAnalyzer::new();
        assert!(!analyzer.patterns.is_empty() || !analyzer.naming_rules.is_empty());
    }
    #[test]
    fn test_camel_case_detection() {
        let mut analyzer = NamingPatternAnalyzer::new();
        let concepts = vec![
            create_test_concept("getUserName", "function", "test.js"),
            create_test_concept("userName", "variable", "test.js"),
        ];
        let patterns = analyzer.analyze_concepts(&concepts, "javascript").unwrap();
        assert!(!patterns.is_empty());
        
        let camel_case_patterns: Vec<_> = patterns.iter()
            .filter(|p| p.description.contains("camelCase"))
            .collect();
        assert!(!camel_case_patterns.is_empty());
    }
    #[test]
    fn test_snake_case_detection() {
        let mut analyzer = NamingPatternAnalyzer::new();
        let concepts = vec![
            create_test_concept("get_user_name", "function", "test.rs"),
            create_test_concept("user_name", "variable", "test.rs"),
        ];
        let patterns = analyzer.analyze_concepts(&concepts, "rust").unwrap();
        assert!(!patterns.is_empty());
        
        let snake_case_patterns: Vec<_> = patterns.iter()
            .filter(|p| p.description.contains("snake_case"))
            .collect();
        assert!(!snake_case_patterns.is_empty());
    }
    #[test]
    fn test_pascal_case_detection() {
        let mut analyzer = NamingPatternAnalyzer::new();
        let concepts = vec![
            create_test_concept("UserService", "class", "test.ts"),
            create_test_concept("ApiClient", "class", "test.ts"),
        ];
        let patterns = analyzer.analyze_concepts(&concepts, "typescript").unwrap();
        assert!(!patterns.is_empty());
        
        let pascal_case_patterns: Vec<_> = patterns.iter()
            .filter(|p| p.description.contains("PascalCase"))
            .collect();
        assert!(!pascal_case_patterns.is_empty());
    }
    #[test]
    fn test_violation_detection() {
        let mut analyzer = NamingPatternAnalyzer::new();
        
        // First establish a pattern
        let good_concepts = vec![
            create_test_concept("getUserName", "function", "test.js"),
            create_test_concept("setUserName", "function", "test.js"),
            create_test_concept("userName", "variable", "test.js"),
        ];
        let _ = analyzer.analyze_concepts(&good_concepts, "javascript").unwrap();
        // Then check for violations
        let bad_concepts = vec![
            create_test_concept("get_user_age", "function", "test.js"), // snake_case in JS
        ];
        
        let violations = analyzer.detect_violations(&bad_concepts, "javascript");
        assert!(!violations.is_empty());
    }
    #[test]
    fn test_recommendations_generation() {
        let mut analyzer = NamingPatternAnalyzer::new();
        let concepts = vec![
            create_test_concept("getUserName", "function", "test.js"),
            create_test_concept("setUserName", "function", "test.js"),
            create_test_concept("userName", "variable", "test.js"),
        ];
        let _ = analyzer.analyze_concepts(&concepts, "javascript").unwrap();
        let recommendations = analyzer.generate_recommendations("javascript");
        assert!(!recommendations.is_empty());
    }
    #[test]
    fn test_context_type_mapping() {
        let analyzer = NamingPatternAnalyzer::new();
        
        assert_eq!(analyzer.get_context_type("class"), "type");
        assert_eq!(analyzer.get_context_type("function"), "function");
        assert_eq!(analyzer.get_context_type("variable"), "variable");
        assert_eq!(analyzer.get_context_type("constant"), "constant");
        assert_eq!(analyzer.get_context_type("unknown"), "unknown");
    }
    #[test]
    fn test_pattern_matching() {
        let analyzer = NamingPatternAnalyzer::new();
        
        assert!(analyzer.matches_pattern("camelCase", "camelCase"));
        assert!(analyzer.matches_pattern("PascalCase", "PascalCase"));
        assert!(analyzer.matches_pattern("snake_case", "snake_case"));
        assert!(analyzer.matches_pattern("CONSTANT_CASE", "CONSTANT_CASE"));
        
        assert!(!analyzer.matches_pattern("PascalCase", "camelCase"));
        assert!(!analyzer.matches_pattern("snake_case", "PascalCase"));
    }
    #[test]
    fn test_name_extraction() {
        let analyzer = NamingPatternAnalyzer::new();
        
        let js_code = "function getUserName() { const userName = 'test'; }";
        let names = analyzer.extract_names_from_code(js_code, "javascript");
        assert!(names.contains(&"getUserName".to_string()));
        assert!(names.contains(&"userName".to_string()));
        
        let rust_code = "fn get_user_name() { let user_name = String::new(); }";
        let names = analyzer.extract_names_from_code(rust_code, "rust");
        assert!(names.contains(&"get_user_name".to_string()));
        assert!(names.contains(&"user_name".to_string()));
    }
    #[test]
    fn test_name_classification() {
        let analyzer = NamingPatternAnalyzer::new();
        
        assert_eq!(analyzer.classify_name("camelCase", "javascript"), Some("camelCase".to_string()));
        assert_eq!(analyzer.classify_name("PascalCase", "javascript"), Some("PascalCase".to_string()));
        assert_eq!(analyzer.classify_name("snake_case", "rust"), Some("snake_case".to_string()));
        assert_eq!(analyzer.classify_name("CONSTANT_CASE", "rust"), Some("SCREAMING_SNAKE_CASE".to_string()));
    }
}