pattern-extractor.jsā¢21.2 kB
/**
* Pattern Extractor Module
* Specialized pattern recognition and extraction capabilities
*/
import fs from 'fs-extra';
import path from 'path';
export class PatternExtractor {
constructor(config) {
this.config = config;
this.patternCache = new Map();
this.extractionRules = this.initializeExtractionRules();
}
initializeExtractionRules() {
return {
step_patterns: [
{ pattern: /(Given|When|Then|And|But)\s+I\s+(.*)/g, type: 'user_action' },
{ pattern: /(Given|When|Then|And|But)\s+the\s+(.*)/g, type: 'system_state' },
{ pattern: /(Given|When|Then|And|But)\s+.*should\s+(.*)/g, type: 'assertion' },
{ pattern: /(Given|When|Then|And|But)\s+.*click\s+(.*)/g, type: 'interaction' },
{ pattern: /(Given|When|Then|And|But)\s+.*enter\s+(.*)/g, type: 'data_input' }
],
selector_patterns: [
{ pattern: /\[data-testid=['"](.*?)['\"]\]/g, type: 'data_testid', reliability: 0.9 },
{ pattern: /#([a-zA-Z][\w-]*)/g, type: 'id', reliability: 0.8 },
{ pattern: /\.([a-zA-Z][\w-]*)/g, type: 'class', reliability: 0.6 },
{ pattern: /\[([a-zA-Z-]+)([=~|^$*]?)['"](.*?)['\"]\]/g, type: 'attribute', reliability: 0.7 },
{ pattern: /^([a-zA-Z][a-zA-Z0-9]*(?:-[a-zA-Z0-9]+)*)$/g, type: 'tag', reliability: 0.4 }
],
page_object_patterns: [
{ pattern: /get\s+(\w+)\(\)\s*{\s*return\s+\$\((['"`])([^'"`]+)\2\);\s*}/g, type: 'getter' },
{ pattern: /async\s+(\w+)\([^)]*\)\s*{/g, type: 'async_method' },
{ pattern: /(\w+)\s*:\s*(['"`])([^'"`]+)\2/g, type: 'property' },
{ pattern: /class\s+(\w+)(?:\s+extends\s+(\w+))?\s*{/g, type: 'class_definition' }
],
data_patterns: [
{ pattern: /export\s+const\s+(\w+)\s*=\s*{([^}]+)}/g, type: 'data_object' },
{ pattern: /(\w+):\s*(['"`])([^'"`]+)\2/g, type: 'string_property' },
{ pattern: /(\w+):\s*(\d+(?:\.\d+)?)/g, type: 'numeric_property' },
{ pattern: /(\w+):\s*(true|false)/g, type: 'boolean_property' }
]
};
}
// Main pattern extraction method
async extractPatterns(content, fileType = 'auto') {
const cacheKey = this.generateCacheKey(content, fileType);
if (this.patternCache.has(cacheKey)) {
return this.patternCache.get(cacheKey);
}
const patterns = await this.performExtraction(content, fileType);
if (this.config.get('performance.enableCaching')) {
this.patternCache.set(cacheKey, patterns);
}
return patterns;
}
async performExtraction(content, fileType) {
const detectedType = fileType === 'auto' ? this.detectFileType(content) : fileType;
const patterns = {
file_type: detectedType,
extraction_timestamp: new Date().toISOString(),
patterns: {},
metadata: {
content_length: content.length,
line_count: content.split('\n').length,
complexity_score: this.calculateContentComplexity(content)
}
};
// Extract patterns based on file type
switch (detectedType) {
case 'feature':
patterns.patterns = await this.extractGherkinPatterns(content);
break;
case 'steps':
patterns.patterns = await this.extractStepPatterns(content);
break;
case 'page':
patterns.patterns = await this.extractPageObjectPatterns(content);
break;
case 'data':
patterns.patterns = await this.extractDataPatterns(content);
break;
case 'javascript':
patterns.patterns = await this.extractJavaScriptPatterns(content);
break;
default:
patterns.patterns = await this.extractGenericPatterns(content);
}
return patterns;
}
detectFileType(content) {
const indicators = {
feature: [/Feature:\s*/, /Scenario:\s*/, /Given|When|Then/],
steps: [/Given\(/, /When\(/, /Then\(/, /@wdio\/cucumber-framework/],
page: [/class\s+\w+Page/, /get\s+\w+\(\)\s*{/, /Page\s*{/],
data: [/export\s+const\s+\w+\s*=\s*{/, /\.data\.js/, /testData/],
javascript: [/import\s+/, /export\s+/, /function\s+/, /class\s+/]
};
for (const [type, patterns] of Object.entries(indicators)) {
if (patterns.some(pattern => pattern.test(content))) {
return type;
}
}
return 'unknown';
}
async extractGherkinPatterns(content) {
const patterns = {
features: this.extractFeatures(content),
scenarios: this.extractScenarios(content),
steps: this.extractGherkinSteps(content),
tags: this.extractTags(content),
tables: this.extractDataTables(content),
backgrounds: this.extractBackgrounds(content),
scenario_outlines: this.extractScenarioOutlines(content)
};
patterns.analysis = {
total_scenarios: patterns.scenarios.length,
total_steps: patterns.steps.length,
step_distribution: this.analyzeStepDistribution(patterns.steps),
complexity_metrics: this.calculateGherkinComplexity(patterns),
reusability_score: this.calculateGherkinReusability(patterns)
};
return patterns;
}
async extractStepPatterns(content) {
const patterns = {
step_definitions: this.extractStepDefinitions(content),
imports: this.extractImports(content),
helper_functions: this.extractHelperFunctions(content),
assertions: this.extractAssertions(content),
waits: this.extractWaitPatterns(content),
data_handling: this.extractDataHandlingPatterns(content)
};
patterns.analysis = {
total_definitions: patterns.step_definitions.length,
complexity_distribution: this.analyzeStepComplexity(patterns.step_definitions),
reusability_metrics: this.analyzeStepReusability(patterns.step_definitions),
best_practices: this.analyzeStepBestPractices(patterns)
};
return patterns;
}
async extractPageObjectPatterns(content) {
const patterns = {
class_definition: this.extractClassDefinition(content),
selectors: this.extractPageSelectors(content),
methods: this.extractPageMethods(content),
properties: this.extractPageProperties(content),
inheritance: this.extractInheritanceInfo(content),
constants: this.extractConstants(content)
};
patterns.analysis = {
structure_score: this.analyzePageStructure(patterns),
selector_quality: this.analyzeSelectorQuality(patterns.selectors),
method_complexity: this.analyzeMethodComplexity(patterns.methods),
maintainability_score: this.calculateMaintainabilityScore(patterns)
};
return patterns;
}
async extractDataPatterns(content) {
const patterns = {
data_objects: this.extractDataObjects(content),
exports: this.extractExports(content),
functions: this.extractDataFunctions(content),
validators: this.extractValidators(content),
transformers: this.extractDataTransformers(content)
};
patterns.analysis = {
data_consistency: this.analyzeDataConsistency(patterns.data_objects),
coverage_metrics: this.analyzeDataCoverage(patterns.data_objects),
type_distribution: this.analyzeDataTypes(patterns.data_objects),
validation_coverage: this.analyzeValidationCoverage(patterns)
};
return patterns;
}
async extractJavaScriptPatterns(content) {
const patterns = {
functions: this.extractFunctions(content),
classes: this.extractClasses(content),
imports: this.extractImports(content),
exports: this.extractExports(content),
async_patterns: this.extractAsyncPatterns(content),
error_handling: this.extractErrorHandling(content)
};
patterns.analysis = {
code_quality: this.analyzeCodeQuality(patterns),
complexity_metrics: this.analyzeJavaScriptComplexity(patterns),
best_practices: this.analyzeJavaScriptBestPractices(patterns)
};
return patterns;
}
async extractGenericPatterns(content) {
return {
lines: content.split('\n').length,
characters: content.length,
words: content.split(/\s+/).length,
basic_analysis: 'Generic content analysis performed'
};
}
// Specific extraction methods
extractFeatures(content) {
const featureRegex = /Feature:\s*(.+?)(?=\n\s*(?:Scenario|Background|$))/gs;
const features = [];
let match;
while ((match = featureRegex.exec(content)) !== null) {
features.push({
title: match[1].trim(),
line: this.getLineNumber(content, match.index),
description: this.extractFeatureDescription(match[0])
});
}
return features;
}
extractScenarios(content) {
const scenarioRegex = /Scenario(?:\s+Outline)?:\s*(.+?)(?=\n\s*(?:Scenario|Feature|Background|$))/gs;
const scenarios = [];
let match;
while ((match = scenarioRegex.exec(content)) !== null) {
const isOutline = match[0].includes('Outline');
scenarios.push({
title: match[1].trim(),
type: isOutline ? 'outline' : 'scenario',
line: this.getLineNumber(content, match.index),
steps: this.extractScenarioSteps(match[0])
});
}
return scenarios;
}
extractGherkinSteps(content) {
const stepRegex = /(Given|When|Then|And|But)\s+(.+)/g;
const steps = [];
let match;
while ((match = stepRegex.exec(content)) !== null) {
steps.push({
type: match[1],
text: match[2].trim(),
line: this.getLineNumber(content, match.index),
category: this.categorizeStep(match[1], match[2])
});
}
return steps;
}
extractTags(content) {
const tagRegex = /@(\w+)/g;
const tags = [];
let match;
while ((match = tagRegex.exec(content)) !== null) {
tags.push({
name: match[1],
line: this.getLineNumber(content, match.index)
});
}
return [...new Set(tags.map(t => t.name))]; // Remove duplicates
}
extractDataTables(content) {
const tableRegex = /\|(.+)\|/g;
const tables = [];
let match;
let currentTable = [];
while ((match = tableRegex.exec(content)) !== null) {
const row = match[1].split('|').map(cell => cell.trim());
currentTable.push(row);
// Check if next line is also a table row
const nextLineStart = content.indexOf('\n', match.index) + 1;
if (nextLineStart < content.length) {
const nextLine = content.substring(nextLineStart, content.indexOf('\n', nextLineStart));
if (!nextLine.trim().startsWith('|')) {
// End of table
if (currentTable.length > 0) {
tables.push({
headers: currentTable[0],
rows: currentTable.slice(1),
line: this.getLineNumber(content, match.index - currentTable.length + 1)
});
}
currentTable = [];
}
}
}
return tables;
}
extractStepDefinitions(content) {
const rules = this.extractionRules.step_patterns;
const definitions = [];
rules.forEach(rule => {
let match;
while ((match = rule.pattern.exec(content)) !== null) {
definitions.push({
type: rule.type,
keyword: match[1],
pattern: match[2],
full_match: match[0],
line: this.getLineNumber(content, match.index)
});
}
});
// Also extract actual step definitions with regex patterns
const stepDefRegex = /(Given|When|Then|And|But)\s*\(\s*['"\/](.*?)['"\/]/g;
let match;
while ((match = stepDefRegex.exec(content)) !== null) {
definitions.push({
type: 'definition',
keyword: match[1],
pattern: match[2],
line: this.getLineNumber(content, match.index),
regex_complexity: this.calculateRegexComplexity(match[2])
});
}
return definitions;
}
extractPageSelectors(content) {
const selectors = [];
const rules = this.extractionRules.selector_patterns;
rules.forEach(rule => {
let match;
const pattern = new RegExp(rule.pattern.source, 'g');
while ((match = pattern.exec(content)) !== null) {
selectors.push({
type: rule.type,
value: match[1] || match[0],
reliability: rule.reliability,
line: this.getLineNumber(content, match.index),
context: this.getContext(content, match.index)
});
}
});
// Extract getter-style selectors
const getterRegex = /get\s+(\w+)\(\)\s*{\s*return\s+\$\((['"`])([^'"`]+)\2\);?\s*}/g;
let match;
while ((match = getterRegex.exec(content)) !== null) {
selectors.push({
type: 'getter',
name: match[1],
selector: match[3],
line: this.getLineNumber(content, match.index)
});
}
return selectors;
}
extractPageMethods(content) {
const methods = [];
// Extract async methods
const asyncMethodRegex = /async\s+(\w+)\s*\(([^)]*)\)\s*{/g;
let match;
while ((match = asyncMethodRegex.exec(content)) !== null) {
methods.push({
name: match[1],
parameters: match[2].split(',').map(p => p.trim()).filter(p => p),
type: 'async',
line: this.getLineNumber(content, match.index),
complexity: this.estimateMethodComplexity(content, match.index)
});
}
// Extract regular methods
const methodRegex = /(\w+)\s*\(([^)]*)\)\s*{/g;
while ((match = methodRegex.exec(content)) !== null) {
if (!match[0].includes('async')) { // Skip async methods already captured
methods.push({
name: match[1],
parameters: match[2].split(',').map(p => p.trim()).filter(p => p),
type: 'regular',
line: this.getLineNumber(content, match.index),
complexity: this.estimateMethodComplexity(content, match.index)
});
}
}
return methods;
}
extractDataObjects(content) {
const objects = [];
const exportRegex = /export\s+const\s+(\w+)\s*=\s*({[\s\S]*?});?/g;
let match;
while ((match = exportRegex.exec(content)) !== null) {
try {
const name = match[1];
const objectContent = match[2];
const properties = this.parseObjectProperties(objectContent);
objects.push({
name,
properties,
line: this.getLineNumber(content, match.index),
type_analysis: this.analyzeDataTypes(properties)
});
} catch (error) {
console.warn(`Failed to parse data object: ${error.message}`);
}
}
return objects;
}
// Analysis and calculation methods
analyzeStepDistribution(steps) {
const distribution = { Given: 0, When: 0, Then: 0, And: 0, But: 0 };
steps.forEach(step => {
distribution[step.type] = (distribution[step.type] || 0) + 1;
});
return distribution;
}
calculateGherkinComplexity(patterns) {
const totalSteps = patterns.steps.length;
const totalScenarios = patterns.scenarios.length;
const outlineCount = patterns.scenarios.filter(s => s.type === 'outline').length;
const tableCount = patterns.tables.length;
return {
step_complexity: totalSteps / Math.max(totalScenarios, 1),
outline_ratio: outlineCount / Math.max(totalScenarios, 1),
table_usage: tableCount / Math.max(totalScenarios, 1),
overall_score: Math.min((totalSteps + outlineCount * 2 + tableCount) / 10, 1)
};
}
calculateGherkinReusability(patterns) {
const stepTexts = patterns.steps.map(s => s.text.toLowerCase());
const uniqueSteps = new Set(stepTexts);
const duplicateCount = stepTexts.length - uniqueSteps.size;
return {
reuse_ratio: duplicateCount / Math.max(stepTexts.length, 1),
unique_steps: uniqueSteps.size,
total_steps: stepTexts.length,
score: duplicateCount > 0 ? duplicateCount / stepTexts.length : 0
};
}
analyzeStepComplexity(stepDefinitions) {
const complexities = stepDefinitions.map(step =>
step.regex_complexity || this.calculateRegexComplexity(step.pattern || '')
);
return {
average: complexities.reduce((a, b) => a + b, 0) / Math.max(complexities.length, 1),
max: Math.max(...complexities, 0),
min: Math.min(...complexities, 0),
distribution: this.categorizeComplexities(complexities)
};
}
calculateRegexComplexity(pattern) {
const specialChars = (pattern.match(/[.*+?^${}()|[\]\\]/g) || []).length;
const groups = (pattern.match(/\(/g) || []).length;
const quantifiers = (pattern.match(/[*+?{]/g) || []).length;
return Math.min((specialChars + groups * 2 + quantifiers) / 10, 1);
}
// Utility methods
getLineNumber(content, index) {
return content.substring(0, index).split('\n').length;
}
getContext(content, index, contextSize = 50) {
const start = Math.max(0, index - contextSize);
const end = Math.min(content.length, index + contextSize);
return content.substring(start, end);
}
generateCacheKey(content, fileType) {
const hash = Buffer.from(content.substring(0, 1000) + fileType).toString('base64');
return hash.substring(0, 20); // Truncate for reasonable key size
}
calculateContentComplexity(content) {
const lines = content.split('\n').length;
const nestingLevel = this.calculateMaxNesting(content);
const cyclomaticComplexity = this.calculateCyclomaticComplexity(content);
return {
lines,
nesting_level: nestingLevel,
cyclomatic: cyclomaticComplexity,
overall: Math.min((lines / 1000 + nestingLevel / 10 + cyclomaticComplexity / 20), 1)
};
}
calculateMaxNesting(content) {
let maxNesting = 0;
let currentNesting = 0;
for (const char of content) {
if (char === '{') {
currentNesting++;
maxNesting = Math.max(maxNesting, currentNesting);
} else if (char === '}') {
currentNesting--;
}
}
return maxNesting;
}
calculateCyclomaticComplexity(content) {
const complexityPatterns = [
/if\s*\(/g,
/for\s*\(/g,
/while\s*\(/g,
/catch\s*\(/g,
/\?\s*.*\s*:/g, // Ternary operator
/&&/g,
/\|\|/g
];
let complexity = 1; // Base complexity
complexityPatterns.forEach(pattern => {
const matches = content.match(pattern);
if (matches) {
complexity += matches.length;
}
});
return complexity;
}
clearCache() {
this.patternCache.clear();
}
getCacheStats() {
return {
size: this.patternCache.size,
keys: Array.from(this.patternCache.keys())
};
}
}