fallback.rs•16.9 kB
//! Fallback pattern-based extraction when tree-sitter parsing fails
use crate::types::{SemanticConcept, LineRange};
use std::collections::HashMap;
use std::path::Path;
/// Fallback extractor for when tree-sitter parsing fails
pub struct FallbackExtractor;
impl FallbackExtractor {
    /// Create a new fallback extractor
    pub fn new() -> Self {
        Self
    }
    /// Extract concepts using regex patterns when tree-sitter fails
    pub fn extract_concepts(&self, file_path: &str, content: &str) -> Vec<SemanticConcept> {
        let mut concepts = Vec::new();
        let mut concept_id = 1;
        // Parse line by line looking for functions, classes, and interfaces
        for (line_num, line) in content.lines().enumerate() {
            let line = line.trim();
            // Try to extract function names
            if let Some(name) = self.extract_function_name(line) {
                concepts.push(self.create_fallback_concept(
                    &format!("fallback_fn_{}", concept_id),
                    name,
                    "function",
                    file_path,
                    line_num + 1,
                ));
                concept_id += 1;
            }
            // Try to extract class names
            if let Some(name) = self.extract_class_name(line) {
                concepts.push(self.create_fallback_concept(
                    &format!("fallback_class_{}", concept_id),
                    name,
                    "class",
                    file_path,
                    line_num + 1,
                ));
                concept_id += 1;
            }
            // Try to extract interface names
            if let Some(name) = self.extract_interface_name(line) {
                concepts.push(self.create_fallback_concept(
                    &format!("fallback_interface_{}", concept_id),
                    name,
                    "interface",
                    file_path,
                    line_num + 1,
                ));
                concept_id += 1;
            }
        }
        // If no concepts found, create a generic file concept
        if concepts.is_empty() {
            let file_name = Path::new(file_path)
                .file_stem()
                .and_then(|s| s.to_str())
                .unwrap_or("unknown");
            concepts.push(self.create_fallback_concept(
                "fallback_file_1",
                file_name.to_string(),
                "file",
                file_path,
                1,
            ));
        }
        concepts
    }
    /// Create a fallback concept with lower confidence
    fn create_fallback_concept(
        &self,
        id: &str,
        name: String,
        concept_type: &str,
        file_path: &str,
        line: usize,
    ) -> SemanticConcept {
        let mut relationships = HashMap::new();
        relationships.insert("extraction_method".to_string(), "fallback".to_string());
        let mut metadata = HashMap::new();
        metadata.insert("source".to_string(), "regex_fallback".to_string());
        metadata.insert(
            "confidence_reason".to_string(),
            "tree_sitter_failed".to_string(),
        );
        SemanticConcept {
            id: id.to_string(),
            name,
            concept_type: concept_type.to_string(),
            confidence: 0.7, // Lower confidence for fallback extraction
            file_path: file_path.to_string(),
            line_range: LineRange {
                start: line as u32,
                end: line as u32,
            },
            relationships,
            metadata,
        }
    }
    /// Extract function names using regex patterns
    fn extract_function_name(&self, line: &str) -> Option<String> {
        // TypeScript/JavaScript function patterns
        if line.contains("function ") {
            if let Some(start) = line.find("function ") {
                let after_function = &line[start + 9..];
                if let Some(end) = after_function.find('(') {
                    let name = after_function[..end].trim();
                    if !name.is_empty() && self.is_valid_identifier(name) {
                        return Some(name.to_string());
                    }
                }
            }
        }
        // Arrow function patterns: const funcName = () =>
        if line.contains("=>") {
            if let Some(equals_pos) = line.find('=') {
                let before_equals = &line[..equals_pos].trim();
                if let Some(name_start) = before_equals.rfind(char::is_whitespace) {
                    let name = before_equals[name_start..].trim();
                    if !name.is_empty() && self.is_valid_identifier(name) {
                        return Some(name.to_string());
                    }
                } else {
                    // Handle case like "const funcName ="
                    if let Some(const_pos) = before_equals.find("const ") {
                        let name = before_equals[const_pos + 6..].trim();
                        if !name.is_empty() && self.is_valid_identifier(name) {
                            return Some(name.to_string());
                        }
                    }
                }
            }
        }
        // Rust function patterns
        if line.contains("fn ") {
            if let Some(start) = line.find("fn ") {
                let after_fn = &line[start + 3..];
                if let Some(end) = after_fn.find('(') {
                    let name = after_fn[..end].trim();
                    if !name.is_empty() && self.is_valid_identifier(name) {
                        return Some(name.to_string());
                    }
                }
            }
        }
        // Python function patterns
        if line.trim_start().starts_with("def ") {
            if let Some(start) = line.find("def ") {
                let after_def = &line[start + 4..];
                if let Some(end) = after_def.find('(') {
                    let name = after_def[..end].trim();
                    if !name.is_empty() && self.is_valid_identifier(name) {
                        return Some(name.to_string());
                    }
                }
            }
        }
        None
    }
    /// Extract class names using regex patterns
    fn extract_class_name(&self, line: &str) -> Option<String> {
        if line.contains("class ") {
            if let Some(start) = line.find("class ") {
                let after_class = &line[start + 6..];
                let end = after_class
                    .find(char::is_whitespace)
                    .or_else(|| after_class.find('{'))
                    .or_else(|| after_class.find('('))
                    .unwrap_or(after_class.len());
                let name = after_class[..end].trim();
                if !name.is_empty() && self.is_valid_identifier(name) {
                    return Some(name.to_string());
                }
            }
        }
        // Rust struct patterns
        if line.contains("struct ") {
            if let Some(start) = line.find("struct ") {
                let after_struct = &line[start + 7..];
                let end = after_struct
                    .find(char::is_whitespace)
                    .or_else(|| after_struct.find('{'))
                    .or_else(|| after_struct.find('<'))
                    .unwrap_or(after_struct.len());
                let name = after_struct[..end].trim();
                if !name.is_empty() && self.is_valid_identifier(name) {
                    return Some(name.to_string());
                }
            }
        }
        None
    }
    /// Extract interface names using regex patterns
    fn extract_interface_name(&self, line: &str) -> Option<String> {
        if line.contains("interface ") {
            if let Some(start) = line.find("interface ") {
                let after_interface = &line[start + 10..];
                let end = after_interface
                    .find(char::is_whitespace)
                    .or_else(|| after_interface.find('{'))
                    .or_else(|| after_interface.find('<'))
                    .unwrap_or(after_interface.len());
                let name = after_interface[..end].trim();
                if !name.is_empty() && self.is_valid_identifier(name) {
                    return Some(name.to_string());
                }
            }
        }
        None
    }
    /// Check if a string is a valid programming language identifier
    fn is_valid_identifier(&self, name: &str) -> bool {
        !name.is_empty()
            && name.chars().next().is_some_and(|c| c.is_alphabetic() || c == '_')
            && name.chars().all(|c| c.is_alphanumeric() || c == '_')
    }
}
impl Default for FallbackExtractor {
    fn default() -> Self {
        Self::new()
    }
}
#[cfg(test)]
mod tests {
    use super::*;
    #[test]
    fn test_extract_javascript_function() {
        let extractor = FallbackExtractor::new();
        
        let code = "function calculateTotal() { return 42; }";
        let concepts = extractor.extract_concepts("test.js", code);
        
        assert_eq!(concepts.len(), 1);
        assert_eq!(concepts[0].name, "calculateTotal");
        assert_eq!(concepts[0].concept_type, "function");
        assert_eq!(concepts[0].confidence, 0.7);
    }
    #[test]
    fn test_extract_arrow_function() {
        let extractor = FallbackExtractor::new();
        
        let code = "const handleClick = () => { console.log('clicked'); }";
        let concepts = extractor.extract_concepts("test.js", code);
        
        assert_eq!(concepts.len(), 1);
        assert_eq!(concepts[0].name, "handleClick");
        assert_eq!(concepts[0].concept_type, "function");
    }
    #[test]
    fn test_extract_typescript_class() {
        let extractor = FallbackExtractor::new();
        
        let code = "export class UserService { getName() { return 'test'; } }";
        let concepts = extractor.extract_concepts("test.ts", code);
        
        assert_eq!(concepts.len(), 2); // Class + method
        
        let class_concept = concepts.iter().find(|c| c.concept_type == "class").unwrap();
        assert_eq!(class_concept.name, "UserService");
        
        let function_concept = concepts.iter().find(|c| c.concept_type == "function").unwrap();
        assert_eq!(function_concept.name, "getName");
    }
    #[test]
    fn test_extract_rust_function() {
        let extractor = FallbackExtractor::new();
        
        let code = "fn calculate_total() -> i32 { 42 }";
        let concepts = extractor.extract_concepts("test.rs", code);
        
        assert_eq!(concepts.len(), 1);
        assert_eq!(concepts[0].name, "calculate_total");
        assert_eq!(concepts[0].concept_type, "function");
    }
    #[test]
    fn test_extract_rust_struct() {
        let extractor = FallbackExtractor::new();
        
        let code = "pub struct User { name: String }";
        let concepts = extractor.extract_concepts("test.rs", code);
        
        assert_eq!(concepts.len(), 1);
        assert_eq!(concepts[0].name, "User");
        assert_eq!(concepts[0].concept_type, "class"); // Mapped as class
    }
    #[test]
    fn test_extract_python_function() {
        let extractor = FallbackExtractor::new();
        
        let code = "def process_data(data):\n    return data.strip()";
        let concepts = extractor.extract_concepts("test.py", code);
        
        assert_eq!(concepts.len(), 1);
        assert_eq!(concepts[0].name, "process_data");
        assert_eq!(concepts[0].concept_type, "function");
    }
    #[test]
    fn test_extract_interface() {
        let extractor = FallbackExtractor::new();
        
        let code = "interface IUserService { getName(): string; }";
        let concepts = extractor.extract_concepts("test.ts", code);
        
        assert_eq!(concepts.len(), 1);
        assert_eq!(concepts[0].name, "IUserService");
        assert_eq!(concepts[0].concept_type, "interface");
    }
    #[test]
    fn test_extract_multiple_concepts() {
        let extractor = FallbackExtractor::new();
        
        let code = r#"
            class Calculator {
                add(a, b) { return a + b; }
            }
            
            function multiply(x, y) {
                return x * y;
            }
            
            interface MathOperations {
                calculate(): number;
            }
        "#;
        
        let concepts = extractor.extract_concepts("test.ts", code);
        
        assert!(concepts.len() >= 3);
        
        let class_concepts = concepts.iter().filter(|c| c.concept_type == "class").count();
        let function_concepts = concepts.iter().filter(|c| c.concept_type == "function").count();
        let interface_concepts = concepts.iter().filter(|c| c.concept_type == "interface").count();
        
        assert!(class_concepts >= 1);
        assert!(function_concepts >= 2); // add + multiply
        assert!(interface_concepts >= 1);
    }
    #[test]
    fn test_empty_content() {
        let extractor = FallbackExtractor::new();
        
        let concepts = extractor.extract_concepts("empty.js", "");
        
        assert_eq!(concepts.len(), 1);
        assert_eq!(concepts[0].name, "empty");
        assert_eq!(concepts[0].concept_type, "file");
    }
    #[test]
    fn test_no_concepts_found() {
        let extractor = FallbackExtractor::new();
        
        let code = "const x = 42;\nconsole.log('hello');";
        let concepts = extractor.extract_concepts("simple.js", code);
        
        assert_eq!(concepts.len(), 1);
        assert_eq!(concepts[0].name, "simple");
        assert_eq!(concepts[0].concept_type, "file");
    }
    #[test]
    fn test_invalid_identifiers() {
        let extractor = FallbackExtractor::new();
        
        // Should not extract invalid identifiers
        assert!(!extractor.is_valid_identifier(""));
        assert!(!extractor.is_valid_identifier("123abc")); // Starts with number
        assert!(!extractor.is_valid_identifier("hello-world")); // Contains dash
        assert!(!extractor.is_valid_identifier("hello.world")); // Contains dot
        
        // Should extract valid identifiers
        assert!(extractor.is_valid_identifier("hello"));
        assert!(extractor.is_valid_identifier("_private"));
        assert!(extractor.is_valid_identifier("camelCase"));
        assert!(extractor.is_valid_identifier("snake_case"));
        assert!(extractor.is_valid_identifier("PascalCase"));
        assert!(extractor.is_valid_identifier("a123"));
    }
    #[test]
    fn test_concept_metadata() {
        let extractor = FallbackExtractor::new();
        
        let code = "function test() { return 42; }";
        let concepts = extractor.extract_concepts("test.js", code);
        
        assert_eq!(concepts.len(), 1);
        
        let concept = &concepts[0];
        assert_eq!(concept.relationships.get("extraction_method"), Some(&"fallback".to_string()));
        assert_eq!(concept.metadata.get("source"), Some(&"regex_fallback".to_string()));
        assert_eq!(concept.metadata.get("confidence_reason"), Some(&"tree_sitter_failed".to_string()));
    }
    #[test]
    fn test_line_numbers() {
        let extractor = FallbackExtractor::new();
        
        let code = r#"
function first() {}
class Second {}
function third() {}
"#;
        
        let concepts = extractor.extract_concepts("test.js", code);
        
        // Find concepts by name and check their line numbers
        let first_fn = concepts.iter().find(|c| c.name == "first").unwrap();
        assert_eq!(first_fn.line_range.start, 2); // Second line (1-indexed)
        
        let second_class = concepts.iter().find(|c| c.name == "Second").unwrap();
        assert_eq!(second_class.line_range.start, 4); // Fourth line
        
        let third_fn = concepts.iter().find(|c| c.name == "third").unwrap();
        assert_eq!(third_fn.line_range.start, 6); // Sixth line
    }
    #[test]
    fn test_edge_case_patterns() {
        let extractor = FallbackExtractor::new();
        
        // Edge case: function keyword in comment should not be extracted
        let code_with_comment = "// This function does something\nfunction realFunction() {}";
        let concepts = extractor.extract_concepts("test.js", code_with_comment);
        
        assert_eq!(concepts.len(), 1);
        assert_eq!(concepts[0].name, "realFunction");
        
        // Edge case: function keyword in string should not be extracted
        let code_with_string = r#"const msg = "function in string"; function actualFunction() {}"#;
        let string_concepts = extractor.extract_concepts("test.js", code_with_string);
        
        assert_eq!(string_concepts.len(), 1);
        assert_eq!(string_concepts[0].name, "actualFunction");
    }
}