file-analyzer.jsā¢16.1 kB
/**
* File Analyzer Module
* Handles all file pattern analysis and similarity calculations
*/
import fs from 'fs-extra';
import path from 'path';
import { glob } from 'glob';
export class FileAnalyzer {
constructor(config) {
this.config = config;
this.cache = new Map();
}
async findFilesByPattern(repo_path, patternType) {
const cacheKey = `${repo_path}-${patternType}`;
if (this.config.get('performance.enableCaching') && this.cache.has(cacheKey)) {
const cached = this.cache.get(cacheKey);
if (Date.now() - cached.timestamp < this.config.get('patternRecognition.maxCacheAge')) {
return cached.data;
}
}
const patterns = {
features: ['**/features/**/*.feature', '**/feature/**/*.feature', '**/*.feature'],
steps: ['**/steps/**/*.{js,ts}', '**/step_definitions/**/*.{js,ts}', '**/step-definitions/**/*.{js,ts}'],
pages: ['**/pages/**/*.{js,ts}', '**/page_objects/**/*.{js,ts}', '**/pageobjects/**/*.{js,ts}'],
components: ['**/components/**/*.{js,ts}', '**/data/**/*.{js,ts}'],
utils: ['**/utils/**/*.{js,ts}', '**/helpers/**/*.{js,ts}'],
all: ['**/*.{js,ts,feature}']
};
const files = [];
const skipDirs = this.config.getSkipDirectories();
try {
const searchPatterns = patterns[patternType] || patterns.all;
const maxFiles = this.config.getMaxFilesToAnalyze();
for (const pattern of searchPatterns) {
const matches = await glob(pattern, {
cwd: repo_path,
absolute: true,
ignore: skipDirs.map(dir => `**/${dir}/**`)
});
files.push(...matches);
if (files.length >= maxFiles) {
files.splice(maxFiles);
break;
}
}
const uniqueFiles = [...new Set(files)];
if (this.config.get('performance.enableCaching')) {
this.cache.set(cacheKey, {
data: uniqueFiles,
timestamp: Date.now()
});
}
return uniqueFiles;
} catch (error) {
console.warn(`Error finding files for pattern ${patternType}: ${error.message}`);
return [];
}
}
async scanForPatterns(repo_path, patternType) {
const patterns = [];
try {
const files = await this.findFilesByPattern(repo_path, patternType);
const processingPromises = [];
if (this.config.shouldEnableParallelProcessing()) {
const maxConcurrent = this.config.getMaxConcurrentFiles();
for (let i = 0; i < files.length; i += maxConcurrent) {
const batch = files.slice(i, i + maxConcurrent);
const batchPromises = batch.map(file => this.processFile(file, repo_path, patternType));
const batchResults = await Promise.allSettled(batchPromises);
batchResults.forEach((result, index) => {
if (result.status === 'fulfilled' && result.value) {
patterns.push(result.value);
} else if (result.status === 'rejected') {
console.warn(`Failed to process file ${batch[index]}: ${result.reason}`);
}
});
}
} else {
for (const file of files) {
try {
const result = await this.processFile(file, repo_path, patternType);
if (result) {
patterns.push(result);
}
} catch (error) {
console.warn(`Failed to process file ${file}: ${error.message}`);
}
}
}
} catch (error) {
console.warn(`Could not scan for ${patternType} patterns: ${error.message}`);
}
return patterns;
}
async processFile(file, repo_path, patternType) {
try {
const content = await fs.readFile(file, 'utf8');
const extractedPatterns = this.extractPatterns(content, patternType);
if (extractedPatterns && Object.keys(extractedPatterns).length > 0) {
return {
file: path.relative(repo_path, file),
fullPath: file,
patterns: extractedPatterns,
size: content.length,
lastModified: (await fs.stat(file)).mtime
};
}
} catch (error) {
console.warn(`Could not read file ${file}: ${error.message}`);
}
return null;
}
extractPatterns(content, patternType) {
const patterns = {};
try {
switch (patternType) {
case 'features':
patterns.features = this.extractFeaturePatterns(content);
break;
case 'steps':
patterns.stepDefinitions = this.extractStepPatterns(content);
break;
case 'pages':
patterns.pageObjects = this.extractPagePatterns(content);
break;
case 'components':
patterns.dataComponents = this.extractDataPatterns(content);
break;
case 'utils':
patterns.utilities = this.extractUtilityPatterns(content);
break;
default:
patterns.general = this.extractGeneralPatterns(content);
}
} catch (error) {
console.warn(`Error extracting patterns for ${patternType}: ${error.message}`);
}
return patterns;
}
extractFeaturePatterns(content) {
const patterns = {
scenarios: [],
tags: [],
steps: [],
background: null,
examples: []
};
// Extract feature scenarios
const scenarioMatches = content.match(/(?:Scenario|Scenario Outline):\s*(.+)/g) || [];
patterns.scenarios = scenarioMatches.map(match =>
match.replace(/(?:Scenario|Scenario Outline):\s*/, '').trim()
);
// Extract tags
const tagMatches = content.match(/@[\w-]+/g) || [];
patterns.tags = [...new Set(tagMatches)];
// Extract step patterns
const stepMatches = content.match(/(Given|When|Then|And|But)\s+(.+)/g) || [];
patterns.steps = stepMatches.map(step => step.trim());
// Extract background
const backgroundMatch = content.match(/Background:\s*\n([\s\S]*?)(?=\n\s*(?:Scenario|Feature|$))/);
if (backgroundMatch) {
patterns.background = backgroundMatch[1].trim();
}
// Extract examples
const exampleMatches = content.match(/Examples:\s*\n([\s\S]*?)(?=\n\s*(?:Scenario|Feature|$))/g) || [];
patterns.examples = exampleMatches.map(example => example.trim());
return patterns;
}
extractStepPatterns(content) {
const patterns = {
stepDefinitions: [],
imports: [],
exports: [],
parameters: [],
hooks: []
};
// Extract step definitions with parameters
const stepDefRegex = /(Given|When|Then|And|But)\s*\(\s*['"`]([^'"`]+)['"`]\s*,?\s*(?:async\s*)?\([^)]*\)\s*=>/g;
let match;
while ((match = stepDefRegex.exec(content)) !== null) {
patterns.stepDefinitions.push({
type: match[1],
pattern: match[2],
hasParameters: match[2].includes('{') || match[2].includes('(')
});
}
// Extract imports
const importMatches = content.match(/import\s+.*?from\s+['"](.+?)['"];?/g) || [];
patterns.imports = importMatches;
// Extract hooks
const hookMatches = content.match(/(Before|After|BeforeAll|AfterAll)\s*\(/g) || [];
patterns.hooks = hookMatches.map(hook => hook.replace(/\s*\(/, ''));
return patterns;
}
extractPagePatterns(content) {
const patterns = {
selectors: new Map(),
methods: [],
className: null,
inheritance: null,
getters: [],
actions: []
};
// Extract class name and inheritance
const classMatch = content.match(/class\s+(\w+)(?:\s+extends\s+(\w+))?/);
if (classMatch) {
patterns.className = classMatch[1];
patterns.inheritance = classMatch[2] || null;
}
// Extract getters (selector patterns)
const getterMatches = content.match(/get\s+(\w+)\s*\(\s*\)\s*{\s*return\s+([^}]+)/g) || [];
getterMatches.forEach(getter => {
const getterMatch = getter.match(/get\s+(\w+)\s*\(\s*\)\s*{\s*return\s+([^}]+)/);
if (getterMatch) {
const selectorMatch = getterMatch[2].match(/['"`]([^'"`]+)['"`]/);
if (selectorMatch) {
patterns.selectors.set(getterMatch[1], selectorMatch[1]);
patterns.getters.push(getterMatch[1]);
}
}
});
// Extract direct selector assignments
const selectorMatches = content.match(/['"]([#.\[].+?)['"]|['"](data-testid.+?)['"]/g) || [];
selectorMatches.forEach(selector => {
const cleanSelector = selector.replace(/['"]/g, '');
if (!patterns.selectors.has(cleanSelector)) {
patterns.selectors.set(cleanSelector, cleanSelector);
}
});
// Extract method names and categorize them
const methodMatches = content.match(/(?:async\s+)?(\w+)\s*\([^)]*\)\s*{/g) || [];
methodMatches.forEach(match => {
const methodMatch = match.match(/(?:async\s+)?(\w+)\s*\([^)]*\)\s*{/);
if (methodMatch && methodMatch[1] !== 'constructor') {
const methodName = methodMatch[1];
patterns.methods.push(methodName);
// Categorize methods
if (methodName.includes('click') || methodName.includes('press') ||
methodName.includes('submit') || methodName.includes('select')) {
patterns.actions.push(methodName);
}
}
});
return patterns;
}
extractDataPatterns(content) {
const patterns = {
dataObjects: [],
constants: [],
exports: [],
configurations: [],
testData: new Map()
};
// Extract exported objects
const exportMatches = content.match(/export\s+(?:const|let|var)\s+(\w+)\s*=\s*{/g) || [];
patterns.exports = exportMatches.map(match =>
match.replace(/export\s+(?:const|let|var)\s+/, '').replace(/\s*=\s*{/, '').trim()
);
// Extract object literals with potential test data
const objectRegex = /(\w+)\s*=\s*{([^}]*)}/g;
let match;
while ((match = objectRegex.exec(content)) !== null) {
patterns.dataObjects.push(match[1]);
// Extract key-value pairs from object
const objectContent = match[2];
const keyValueMatches = objectContent.match(/(\w+)\s*:\s*['"](.*?)['"]/g) || [];
keyValueMatches.forEach(kv => {
const kvMatch = kv.match(/(\w+)\s*:\s*['"](.*?)['"]/);
if (kvMatch) {
patterns.testData.set(kvMatch[1], kvMatch[2]);
}
});
}
return patterns;
}
extractUtilityPatterns(content) {
const patterns = {
functions: [],
classes: [],
exports: [],
dependencies: [],
categories: new Map()
};
// Extract and categorize functions
const functionRegex = /(?:export\s+)?(?:async\s+)?(?:function\s+(\w+)|(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(?:function|\([^)]*\)\s*=>))/g;
let match;
while ((match = functionRegex.exec(content)) !== null) {
const functionName = match[1] || match[2];
if (functionName) {
patterns.functions.push({
name: functionName,
isAsync: match[0].includes('async'),
isExported: match[0].includes('export')
});
// Categorize functions by name patterns
const name = functionName.toLowerCase();
if (name.includes('wait') || name.includes('timeout')) {
this.addToCategory(patterns.categories, 'timing', functionName);
} else if (name.includes('validate') || name.includes('check') || name.includes('verify')) {
this.addToCategory(patterns.categories, 'validation', functionName);
} else if (name.includes('format') || name.includes('parse') || name.includes('convert')) {
this.addToCategory(patterns.categories, 'formatting', functionName);
} else if (name.includes('get') || name.includes('find') || name.includes('search')) {
this.addToCategory(patterns.categories, 'retrieval', functionName);
} else {
this.addToCategory(patterns.categories, 'general', functionName);
}
}
}
return patterns;
}
extractGeneralPatterns(content) {
return {
imports: content.match(/import\s+.*?from\s+['"](.+?)['"];?/g) || [],
exports: content.match(/export\s+.*?/g) || [],
functions: content.match(/(?:function\s+|const\s+\w+\s*=\s*(?:async\s+)?(?:function|\([^)]*\)\s*=>))(\w+)/g) || [],
classes: content.match(/class\s+(\w+)/g) || [],
variables: content.match(/(?:const|let|var)\s+(\w+)/g) || []
};
}
addToCategory(categoriesMap, category, item) {
if (!categoriesMap.has(category)) {
categoriesMap.set(category, []);
}
categoriesMap.get(category).push(item);
}
// Calculate similarity between files
calculateFileSimilarity(file1Patterns, file2Patterns) {
const weights = {
structure: 0.3,
content: 0.4,
naming: 0.3
};
let structureScore = 0;
let contentScore = 0;
let namingScore = 0;
// Structure similarity (presence of similar patterns)
const file1Keys = Object.keys(file1Patterns);
const file2Keys = Object.keys(file2Patterns);
const commonKeys = file1Keys.filter(key => file2Keys.includes(key));
structureScore = commonKeys.length / Math.max(file1Keys.length, file2Keys.length);
// Content similarity (overlap of specific elements)
commonKeys.forEach(key => {
const pattern1 = file1Patterns[key];
const pattern2 = file2Patterns[key];
if (Array.isArray(pattern1) && Array.isArray(pattern2)) {
const overlap = pattern1.filter(item => pattern2.includes(item));
contentScore += overlap.length / Math.max(pattern1.length, pattern2.length);
}
});
if (commonKeys.length > 0) {
contentScore /= commonKeys.length;
}
// Naming similarity (similar naming patterns)
// This is a simplified implementation
namingScore = 0.5; // Default middle score
return (structureScore * weights.structure) +
(contentScore * weights.content) +
(namingScore * weights.naming);
}
clearCache() {
this.cache.clear();
}
}