Skip to main content
Glama

Automation Script Generator MCP Server

pattern-extractor.js•21.2 kB
/** * Pattern Extractor Module * Specialized pattern recognition and extraction capabilities */ import fs from 'fs-extra'; import path from 'path'; export class PatternExtractor { constructor(config) { this.config = config; this.patternCache = new Map(); this.extractionRules = this.initializeExtractionRules(); } initializeExtractionRules() { return { step_patterns: [ { pattern: /(Given|When|Then|And|But)\s+I\s+(.*)/g, type: 'user_action' }, { pattern: /(Given|When|Then|And|But)\s+the\s+(.*)/g, type: 'system_state' }, { pattern: /(Given|When|Then|And|But)\s+.*should\s+(.*)/g, type: 'assertion' }, { pattern: /(Given|When|Then|And|But)\s+.*click\s+(.*)/g, type: 'interaction' }, { pattern: /(Given|When|Then|And|But)\s+.*enter\s+(.*)/g, type: 'data_input' } ], selector_patterns: [ { pattern: /\[data-testid=['"](.*?)['\"]\]/g, type: 'data_testid', reliability: 0.9 }, { pattern: /#([a-zA-Z][\w-]*)/g, type: 'id', reliability: 0.8 }, { pattern: /\.([a-zA-Z][\w-]*)/g, type: 'class', reliability: 0.6 }, { pattern: /\[([a-zA-Z-]+)([=~|^$*]?)['"](.*?)['\"]\]/g, type: 'attribute', reliability: 0.7 }, { pattern: /^([a-zA-Z][a-zA-Z0-9]*(?:-[a-zA-Z0-9]+)*)$/g, type: 'tag', reliability: 0.4 } ], page_object_patterns: [ { pattern: /get\s+(\w+)\(\)\s*{\s*return\s+\$\((['"`])([^'"`]+)\2\);\s*}/g, type: 'getter' }, { pattern: /async\s+(\w+)\([^)]*\)\s*{/g, type: 'async_method' }, { pattern: /(\w+)\s*:\s*(['"`])([^'"`]+)\2/g, type: 'property' }, { pattern: /class\s+(\w+)(?:\s+extends\s+(\w+))?\s*{/g, type: 'class_definition' } ], data_patterns: [ { pattern: /export\s+const\s+(\w+)\s*=\s*{([^}]+)}/g, type: 'data_object' }, { pattern: /(\w+):\s*(['"`])([^'"`]+)\2/g, type: 'string_property' }, { pattern: /(\w+):\s*(\d+(?:\.\d+)?)/g, type: 'numeric_property' }, { pattern: /(\w+):\s*(true|false)/g, type: 'boolean_property' } ] }; } // Main pattern extraction method async extractPatterns(content, fileType = 'auto') { const cacheKey = this.generateCacheKey(content, fileType); if (this.patternCache.has(cacheKey)) { return this.patternCache.get(cacheKey); } const patterns = await this.performExtraction(content, fileType); if (this.config.get('performance.enableCaching')) { this.patternCache.set(cacheKey, patterns); } return patterns; } async performExtraction(content, fileType) { const detectedType = fileType === 'auto' ? this.detectFileType(content) : fileType; const patterns = { file_type: detectedType, extraction_timestamp: new Date().toISOString(), patterns: {}, metadata: { content_length: content.length, line_count: content.split('\n').length, complexity_score: this.calculateContentComplexity(content) } }; // Extract patterns based on file type switch (detectedType) { case 'feature': patterns.patterns = await this.extractGherkinPatterns(content); break; case 'steps': patterns.patterns = await this.extractStepPatterns(content); break; case 'page': patterns.patterns = await this.extractPageObjectPatterns(content); break; case 'data': patterns.patterns = await this.extractDataPatterns(content); break; case 'javascript': patterns.patterns = await this.extractJavaScriptPatterns(content); break; default: patterns.patterns = await this.extractGenericPatterns(content); } return patterns; } detectFileType(content) { const indicators = { feature: [/Feature:\s*/, /Scenario:\s*/, /Given|When|Then/], steps: [/Given\(/, /When\(/, /Then\(/, /@wdio\/cucumber-framework/], page: [/class\s+\w+Page/, /get\s+\w+\(\)\s*{/, /Page\s*{/], data: [/export\s+const\s+\w+\s*=\s*{/, /\.data\.js/, /testData/], javascript: [/import\s+/, /export\s+/, /function\s+/, /class\s+/] }; for (const [type, patterns] of Object.entries(indicators)) { if (patterns.some(pattern => pattern.test(content))) { return type; } } return 'unknown'; } async extractGherkinPatterns(content) { const patterns = { features: this.extractFeatures(content), scenarios: this.extractScenarios(content), steps: this.extractGherkinSteps(content), tags: this.extractTags(content), tables: this.extractDataTables(content), backgrounds: this.extractBackgrounds(content), scenario_outlines: this.extractScenarioOutlines(content) }; patterns.analysis = { total_scenarios: patterns.scenarios.length, total_steps: patterns.steps.length, step_distribution: this.analyzeStepDistribution(patterns.steps), complexity_metrics: this.calculateGherkinComplexity(patterns), reusability_score: this.calculateGherkinReusability(patterns) }; return patterns; } async extractStepPatterns(content) { const patterns = { step_definitions: this.extractStepDefinitions(content), imports: this.extractImports(content), helper_functions: this.extractHelperFunctions(content), assertions: this.extractAssertions(content), waits: this.extractWaitPatterns(content), data_handling: this.extractDataHandlingPatterns(content) }; patterns.analysis = { total_definitions: patterns.step_definitions.length, complexity_distribution: this.analyzeStepComplexity(patterns.step_definitions), reusability_metrics: this.analyzeStepReusability(patterns.step_definitions), best_practices: this.analyzeStepBestPractices(patterns) }; return patterns; } async extractPageObjectPatterns(content) { const patterns = { class_definition: this.extractClassDefinition(content), selectors: this.extractPageSelectors(content), methods: this.extractPageMethods(content), properties: this.extractPageProperties(content), inheritance: this.extractInheritanceInfo(content), constants: this.extractConstants(content) }; patterns.analysis = { structure_score: this.analyzePageStructure(patterns), selector_quality: this.analyzeSelectorQuality(patterns.selectors), method_complexity: this.analyzeMethodComplexity(patterns.methods), maintainability_score: this.calculateMaintainabilityScore(patterns) }; return patterns; } async extractDataPatterns(content) { const patterns = { data_objects: this.extractDataObjects(content), exports: this.extractExports(content), functions: this.extractDataFunctions(content), validators: this.extractValidators(content), transformers: this.extractDataTransformers(content) }; patterns.analysis = { data_consistency: this.analyzeDataConsistency(patterns.data_objects), coverage_metrics: this.analyzeDataCoverage(patterns.data_objects), type_distribution: this.analyzeDataTypes(patterns.data_objects), validation_coverage: this.analyzeValidationCoverage(patterns) }; return patterns; } async extractJavaScriptPatterns(content) { const patterns = { functions: this.extractFunctions(content), classes: this.extractClasses(content), imports: this.extractImports(content), exports: this.extractExports(content), async_patterns: this.extractAsyncPatterns(content), error_handling: this.extractErrorHandling(content) }; patterns.analysis = { code_quality: this.analyzeCodeQuality(patterns), complexity_metrics: this.analyzeJavaScriptComplexity(patterns), best_practices: this.analyzeJavaScriptBestPractices(patterns) }; return patterns; } async extractGenericPatterns(content) { return { lines: content.split('\n').length, characters: content.length, words: content.split(/\s+/).length, basic_analysis: 'Generic content analysis performed' }; } // Specific extraction methods extractFeatures(content) { const featureRegex = /Feature:\s*(.+?)(?=\n\s*(?:Scenario|Background|$))/gs; const features = []; let match; while ((match = featureRegex.exec(content)) !== null) { features.push({ title: match[1].trim(), line: this.getLineNumber(content, match.index), description: this.extractFeatureDescription(match[0]) }); } return features; } extractScenarios(content) { const scenarioRegex = /Scenario(?:\s+Outline)?:\s*(.+?)(?=\n\s*(?:Scenario|Feature|Background|$))/gs; const scenarios = []; let match; while ((match = scenarioRegex.exec(content)) !== null) { const isOutline = match[0].includes('Outline'); scenarios.push({ title: match[1].trim(), type: isOutline ? 'outline' : 'scenario', line: this.getLineNumber(content, match.index), steps: this.extractScenarioSteps(match[0]) }); } return scenarios; } extractGherkinSteps(content) { const stepRegex = /(Given|When|Then|And|But)\s+(.+)/g; const steps = []; let match; while ((match = stepRegex.exec(content)) !== null) { steps.push({ type: match[1], text: match[2].trim(), line: this.getLineNumber(content, match.index), category: this.categorizeStep(match[1], match[2]) }); } return steps; } extractTags(content) { const tagRegex = /@(\w+)/g; const tags = []; let match; while ((match = tagRegex.exec(content)) !== null) { tags.push({ name: match[1], line: this.getLineNumber(content, match.index) }); } return [...new Set(tags.map(t => t.name))]; // Remove duplicates } extractDataTables(content) { const tableRegex = /\|(.+)\|/g; const tables = []; let match; let currentTable = []; while ((match = tableRegex.exec(content)) !== null) { const row = match[1].split('|').map(cell => cell.trim()); currentTable.push(row); // Check if next line is also a table row const nextLineStart = content.indexOf('\n', match.index) + 1; if (nextLineStart < content.length) { const nextLine = content.substring(nextLineStart, content.indexOf('\n', nextLineStart)); if (!nextLine.trim().startsWith('|')) { // End of table if (currentTable.length > 0) { tables.push({ headers: currentTable[0], rows: currentTable.slice(1), line: this.getLineNumber(content, match.index - currentTable.length + 1) }); } currentTable = []; } } } return tables; } extractStepDefinitions(content) { const rules = this.extractionRules.step_patterns; const definitions = []; rules.forEach(rule => { let match; while ((match = rule.pattern.exec(content)) !== null) { definitions.push({ type: rule.type, keyword: match[1], pattern: match[2], full_match: match[0], line: this.getLineNumber(content, match.index) }); } }); // Also extract actual step definitions with regex patterns const stepDefRegex = /(Given|When|Then|And|But)\s*\(\s*['"\/](.*?)['"\/]/g; let match; while ((match = stepDefRegex.exec(content)) !== null) { definitions.push({ type: 'definition', keyword: match[1], pattern: match[2], line: this.getLineNumber(content, match.index), regex_complexity: this.calculateRegexComplexity(match[2]) }); } return definitions; } extractPageSelectors(content) { const selectors = []; const rules = this.extractionRules.selector_patterns; rules.forEach(rule => { let match; const pattern = new RegExp(rule.pattern.source, 'g'); while ((match = pattern.exec(content)) !== null) { selectors.push({ type: rule.type, value: match[1] || match[0], reliability: rule.reliability, line: this.getLineNumber(content, match.index), context: this.getContext(content, match.index) }); } }); // Extract getter-style selectors const getterRegex = /get\s+(\w+)\(\)\s*{\s*return\s+\$\((['"`])([^'"`]+)\2\);?\s*}/g; let match; while ((match = getterRegex.exec(content)) !== null) { selectors.push({ type: 'getter', name: match[1], selector: match[3], line: this.getLineNumber(content, match.index) }); } return selectors; } extractPageMethods(content) { const methods = []; // Extract async methods const asyncMethodRegex = /async\s+(\w+)\s*\(([^)]*)\)\s*{/g; let match; while ((match = asyncMethodRegex.exec(content)) !== null) { methods.push({ name: match[1], parameters: match[2].split(',').map(p => p.trim()).filter(p => p), type: 'async', line: this.getLineNumber(content, match.index), complexity: this.estimateMethodComplexity(content, match.index) }); } // Extract regular methods const methodRegex = /(\w+)\s*\(([^)]*)\)\s*{/g; while ((match = methodRegex.exec(content)) !== null) { if (!match[0].includes('async')) { // Skip async methods already captured methods.push({ name: match[1], parameters: match[2].split(',').map(p => p.trim()).filter(p => p), type: 'regular', line: this.getLineNumber(content, match.index), complexity: this.estimateMethodComplexity(content, match.index) }); } } return methods; } extractDataObjects(content) { const objects = []; const exportRegex = /export\s+const\s+(\w+)\s*=\s*({[\s\S]*?});?/g; let match; while ((match = exportRegex.exec(content)) !== null) { try { const name = match[1]; const objectContent = match[2]; const properties = this.parseObjectProperties(objectContent); objects.push({ name, properties, line: this.getLineNumber(content, match.index), type_analysis: this.analyzeDataTypes(properties) }); } catch (error) { console.warn(`Failed to parse data object: ${error.message}`); } } return objects; } // Analysis and calculation methods analyzeStepDistribution(steps) { const distribution = { Given: 0, When: 0, Then: 0, And: 0, But: 0 }; steps.forEach(step => { distribution[step.type] = (distribution[step.type] || 0) + 1; }); return distribution; } calculateGherkinComplexity(patterns) { const totalSteps = patterns.steps.length; const totalScenarios = patterns.scenarios.length; const outlineCount = patterns.scenarios.filter(s => s.type === 'outline').length; const tableCount = patterns.tables.length; return { step_complexity: totalSteps / Math.max(totalScenarios, 1), outline_ratio: outlineCount / Math.max(totalScenarios, 1), table_usage: tableCount / Math.max(totalScenarios, 1), overall_score: Math.min((totalSteps + outlineCount * 2 + tableCount) / 10, 1) }; } calculateGherkinReusability(patterns) { const stepTexts = patterns.steps.map(s => s.text.toLowerCase()); const uniqueSteps = new Set(stepTexts); const duplicateCount = stepTexts.length - uniqueSteps.size; return { reuse_ratio: duplicateCount / Math.max(stepTexts.length, 1), unique_steps: uniqueSteps.size, total_steps: stepTexts.length, score: duplicateCount > 0 ? duplicateCount / stepTexts.length : 0 }; } analyzeStepComplexity(stepDefinitions) { const complexities = stepDefinitions.map(step => step.regex_complexity || this.calculateRegexComplexity(step.pattern || '') ); return { average: complexities.reduce((a, b) => a + b, 0) / Math.max(complexities.length, 1), max: Math.max(...complexities, 0), min: Math.min(...complexities, 0), distribution: this.categorizeComplexities(complexities) }; } calculateRegexComplexity(pattern) { const specialChars = (pattern.match(/[.*+?^${}()|[\]\\]/g) || []).length; const groups = (pattern.match(/\(/g) || []).length; const quantifiers = (pattern.match(/[*+?{]/g) || []).length; return Math.min((specialChars + groups * 2 + quantifiers) / 10, 1); } // Utility methods getLineNumber(content, index) { return content.substring(0, index).split('\n').length; } getContext(content, index, contextSize = 50) { const start = Math.max(0, index - contextSize); const end = Math.min(content.length, index + contextSize); return content.substring(start, end); } generateCacheKey(content, fileType) { const hash = Buffer.from(content.substring(0, 1000) + fileType).toString('base64'); return hash.substring(0, 20); // Truncate for reasonable key size } calculateContentComplexity(content) { const lines = content.split('\n').length; const nestingLevel = this.calculateMaxNesting(content); const cyclomaticComplexity = this.calculateCyclomaticComplexity(content); return { lines, nesting_level: nestingLevel, cyclomatic: cyclomaticComplexity, overall: Math.min((lines / 1000 + nestingLevel / 10 + cyclomaticComplexity / 20), 1) }; } calculateMaxNesting(content) { let maxNesting = 0; let currentNesting = 0; for (const char of content) { if (char === '{') { currentNesting++; maxNesting = Math.max(maxNesting, currentNesting); } else if (char === '}') { currentNesting--; } } return maxNesting; } calculateCyclomaticComplexity(content) { const complexityPatterns = [ /if\s*\(/g, /for\s*\(/g, /while\s*\(/g, /catch\s*\(/g, /\?\s*.*\s*:/g, // Ternary operator /&&/g, /\|\|/g ]; let complexity = 1; // Base complexity complexityPatterns.forEach(pattern => { const matches = content.match(pattern); if (matches) { complexity += matches.length; } }); return complexity; } clearCache() { this.patternCache.clear(); } getCacheStats() { return { size: this.patternCache.size, keys: Array.from(this.patternCache.keys()) }; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/raymondsambur/automation-script-generator'

If you have feedback or need assistance with the MCP directory API, please join our Discord server