Skip to main content
Glama
semantic-duplicate-analyzer.tsโ€ข22.4 kB
import { ZebrunnerTestCase } from '../types/core.js'; import { TestCaseDuplicateAnalyzer, DuplicateAnalysisResult, TestCaseStep, SimilarityScore, TestCaseCluster } from './duplicate-analyzer.js'; export interface SemanticAnalysisOptions { stepClusteringThreshold: number; // 0.80-0.90 for step similarity testCaseClusteringThreshold: number; // 0.80-0.90 for test case similarity useStepClustering: boolean; // Two-phase clustering useMedoidSelection: boolean; // Better representative selection includeSemanticPatterns: boolean; // Enhanced pattern detection } export interface StepCluster { id: string; representativeStep: string; steps: TestCaseStep[]; semanticSummary: string; frequency: number; } export interface SemanticSimilarityScore extends SimilarityScore { stepClusterOverlap: number; semanticConfidence: number; clusterBasedSimilarity: number; } export interface SemanticTestCaseCluster extends TestCaseCluster { stepClusters: string[]; semanticCoherence: number; medoidTestCase: string; clusterType: 'exact_match' | 'semantic_similar' | 'pattern_based' | 'hybrid'; } export interface SemanticDuplicateAnalysisResult extends DuplicateAnalysisResult { stepClusters: StepCluster[]; semanticClusters: SemanticTestCaseCluster[]; analysisMode: 'basic' | 'semantic' | 'hybrid'; semanticInsights: { commonStepPatterns: string[]; discoveredWorkflows: string[]; automationOpportunities: string[]; }; } export class SemanticDuplicateAnalyzer extends TestCaseDuplicateAnalyzer { private options: SemanticAnalysisOptions; constructor(similarityThreshold: number = 80, options: Partial<SemanticAnalysisOptions> = {}) { super(similarityThreshold); this.options = { stepClusteringThreshold: options.stepClusteringThreshold || 0.85, testCaseClusteringThreshold: options.testCaseClusteringThreshold || 0.80, useStepClustering: options.useStepClustering || true, useMedoidSelection: options.useMedoidSelection || true, includeSemanticPatterns: options.includeSemanticPatterns || true, ...options }; } /** * Enhanced semantic analysis using LLM-powered step clustering */ async analyzeSemanticDuplicates( testCases: ZebrunnerTestCase[], projectKey: string, suiteId?: number, llmAnalysisFunction?: (prompt: string) => Promise<string> ): Promise<SemanticDuplicateAnalysisResult> { console.log(`๐Ÿง  Starting semantic analysis of ${testCases.length} test cases...`); // Phase 1: Extract and normalize all steps const allSteps = this.extractAllSteps(testCases); console.log(`๐Ÿ“ Extracted ${allSteps.length} total steps`); // Phase 2: Cluster similar steps (using LLM if available) const stepClusters = this.options.useStepClustering ? await this.clusterStepsSemantically(allSteps, llmAnalysisFunction) : this.clusterStepsBasic(allSteps); console.log(`๐Ÿ—‚๏ธ Created ${stepClusters.length} step clusters`); // Phase 3: Represent test cases in step-cluster space const testCaseVectors = this.createTestCaseVectors(testCases, stepClusters); // Phase 4: Compute semantic similarities const semanticSimilarities = this.computeSemanticSimilarities(testCases, testCaseVectors, stepClusters); // Phase 5: Create semantic clusters const semanticClusters = this.createSemanticClusters(testCases, semanticSimilarities, stepClusters); // Phase 6: Generate insights const semanticInsights = await this.generateSemanticInsights( stepClusters, semanticClusters, llmAnalysisFunction ); // Calculate potential savings const duplicateTestCases = semanticClusters.reduce((sum, cluster) => sum + cluster.testCases.length - 1, 0); const estimatedTimeReduction = this.calculateTimeReduction(duplicateTestCases, testCases.length); return { suiteId, projectKey, totalTestCases: testCases.length, clustersFound: semanticClusters.length, potentialSavings: { duplicateTestCases, estimatedTimeReduction }, clusters: semanticClusters, stepClusters, semanticClusters, analysisMode: llmAnalysisFunction ? 'semantic' : 'hybrid', semanticInsights, similarityMatrix: semanticSimilarities }; } /** * Extract all steps from all test cases with metadata */ private extractAllSteps(testCases: ZebrunnerTestCase[]): (TestCaseStep & { testCaseId: string })[] { const allSteps: (TestCaseStep & { testCaseId: string })[] = []; testCases.forEach(testCase => { const steps = this.extractSteps(testCase); steps.forEach(step => { allSteps.push({ ...step, testCaseId: testCase.key || `tc-${testCase.id}` }); }); }); return allSteps; } /** * Cluster steps using LLM semantic understanding */ private async clusterStepsSemantically( allSteps: (TestCaseStep & { testCaseId: string })[], llmAnalysisFunction?: (prompt: string) => Promise<string> ): Promise<StepCluster[]> { if (!llmAnalysisFunction) { return this.clusterStepsBasic(allSteps); } // Group steps by similarity using LLM const stepTexts = allSteps.map(step => step.action); const uniqueSteps = [...new Set(stepTexts)]; if (uniqueSteps.length > 100) { console.log(`โš ๏ธ Large number of unique steps (${uniqueSteps.length}), using basic clustering`); return this.clusterStepsBasic(allSteps); } const prompt = ` Analyze these test steps and group them into semantic clusters. Steps that perform the same logical action should be in the same cluster, even if worded differently. Steps to cluster: ${uniqueSteps.map((step, i) => `${i + 1}. ${step}`).join('\n')} Please respond with JSON in this format: { "clusters": [ { "id": "cluster_1", "representative": "most common/clear step text", "summary": "what this cluster represents", "steps": ["step text 1", "step text 2", ...] } ] } Focus on grouping steps that: - Perform the same action (click, navigate, verify, etc.) - Target the same UI element or functionality - Have the same logical intent Threshold: Group steps that are at least ${Math.round(this.options.stepClusteringThreshold * 100)}% semantically similar. `; try { const response = await llmAnalysisFunction(prompt); const parsed = JSON.parse(response); return parsed.clusters.map((cluster: any, index: number) => ({ id: cluster.id || `cluster_${index + 1}`, representativeStep: cluster.representative, steps: cluster.steps.map((stepText: string) => { const originalStep = allSteps.find(s => s.action === stepText); return originalStep || { action: stepText, stepNumber: 0, testCaseId: 'unknown' }; }), semanticSummary: cluster.summary, frequency: cluster.steps.length })); } catch (error) { console.log(`โš ๏ธ LLM clustering failed, falling back to basic clustering: ${error}`); return this.clusterStepsBasic(allSteps); } } /** * Basic step clustering using text similarity (fallback) */ private clusterStepsBasic(allSteps: (TestCaseStep & { testCaseId: string })[]): StepCluster[] { const clusters: StepCluster[] = []; const processed = new Set<string>(); allSteps.forEach((step, index) => { if (processed.has(step.action)) return; const cluster: StepCluster = { id: `cluster_${clusters.length + 1}`, representativeStep: step.action, steps: [step], semanticSummary: `Basic cluster for: ${step.action}`, frequency: 1 }; // Find similar steps allSteps.slice(index + 1).forEach(otherStep => { if (processed.has(otherStep.action)) return; const similarity = this.calculateTextSimilarity(step.action, otherStep.action); if (similarity >= this.options.stepClusteringThreshold) { cluster.steps.push(otherStep); cluster.frequency++; processed.add(otherStep.action); } }); processed.add(step.action); clusters.push(cluster); }); return clusters.sort((a, b) => b.frequency - a.frequency); } /** * Create test case vectors in step-cluster space */ private createTestCaseVectors( testCases: ZebrunnerTestCase[], stepClusters: StepCluster[] ): Map<string, Map<string, number>> { const vectors = new Map<string, Map<string, number>>(); testCases.forEach(testCase => { const testCaseId = testCase.key || `tc-${testCase.id}`; const vector = new Map<string, number>(); // Initialize vector with all clusters stepClusters.forEach(cluster => { vector.set(cluster.id, 0); }); // Count step cluster occurrences const steps = this.extractSteps(testCase); steps.forEach(step => { // Find which cluster this step belongs to const cluster = stepClusters.find(c => c.steps.some(clusterStep => this.calculateTextSimilarity(step.action, clusterStep.action) >= this.options.stepClusteringThreshold ) ); if (cluster) { vector.set(cluster.id, (vector.get(cluster.id) || 0) + 1); } }); vectors.set(testCaseId, vector); }); return vectors; } /** * Compute semantic similarities using step-cluster vectors */ private computeSemanticSimilarities( testCases: ZebrunnerTestCase[], testCaseVectors: Map<string, Map<string, number>>, stepClusters: StepCluster[] ): SemanticSimilarityScore[] { const similarities: SemanticSimilarityScore[] = []; for (let i = 0; i < testCases.length; i++) { for (let j = i + 1; j < testCases.length; j++) { const testCase1 = testCases[i]; const testCase2 = testCases[j]; const key1 = testCase1.key || `tc-${testCase1.id}`; const key2 = testCase2.key || `tc-${testCase2.id}`; const vector1 = testCaseVectors.get(key1); const vector2 = testCaseVectors.get(key2); if (!vector1 || !vector2) continue; // Calculate Jaccard similarity on step clusters const jaccardSimilarity = this.calculateJaccardSimilarity(vector1, vector2); // Calculate cosine similarity on count vectors const cosineSimilarity = this.calculateCosineSimilarity(vector1, vector2); // Combined similarity score const combinedSimilarity = (jaccardSimilarity * 0.6) + (cosineSimilarity * 0.4); if (combinedSimilarity * 100 >= this.options.testCaseClusteringThreshold * 100) { // Get original similarity data for compatibility const originalSteps1 = this.extractSteps(testCase1); const originalSteps2 = this.extractSteps(testCase2); const originalSimilarity = this.calculateStepSimilarity(originalSteps1, originalSteps2); // Detect pattern const pattern = this.detectDuplicationPattern(testCase1, testCase2); similarities.push({ testCase1Key: key1, testCase2Key: key2, similarityPercentage: Math.round(combinedSimilarity * 100 * 100) / 100, sharedSteps: originalSimilarity.sharedSteps, totalSteps1: originalSteps1.length, totalSteps2: originalSteps2.length, sharedStepsSummary: originalSimilarity.sharedStepsSummary, patternType: pattern.patternType, variationDetails: pattern.variationDetails, stepClusterOverlap: Math.round(jaccardSimilarity * 100), semanticConfidence: Math.round(cosineSimilarity * 100), clusterBasedSimilarity: Math.round(combinedSimilarity * 100) }); } } } return similarities.sort((a, b) => b.similarityPercentage - a.similarityPercentage); } /** * Calculate Jaccard similarity for step cluster vectors */ private calculateJaccardSimilarity(vector1: Map<string, number>, vector2: Map<string, number>): number { const keys1 = new Set([...vector1.keys()].filter(k => (vector1.get(k) || 0) > 0)); const keys2 = new Set([...vector2.keys()].filter(k => (vector2.get(k) || 0) > 0)); const intersection = new Set([...keys1].filter(k => keys2.has(k))); const union = new Set([...keys1, ...keys2]); return union.size === 0 ? 0 : intersection.size / union.size; } /** * Calculate cosine similarity for step cluster count vectors */ private calculateCosineSimilarity(vector1: Map<string, number>, vector2: Map<string, number>): number { let dotProduct = 0; let norm1 = 0; let norm2 = 0; // Get all unique keys const allKeys = new Set([...vector1.keys(), ...vector2.keys()]); allKeys.forEach(key => { const val1 = vector1.get(key) || 0; const val2 = vector2.get(key) || 0; dotProduct += val1 * val2; norm1 += val1 * val1; norm2 += val2 * val2; }); const denominator = Math.sqrt(norm1) * Math.sqrt(norm2); return denominator === 0 ? 0 : dotProduct / denominator; } /** * Create semantic clusters with enhanced analysis */ private createSemanticClusters( testCases: ZebrunnerTestCase[], similarities: SemanticSimilarityScore[], stepClusters: StepCluster[] ): SemanticTestCaseCluster[] { const clusters: SemanticTestCaseCluster[] = []; const processed = new Set<string>(); const testCaseMap = new Map(testCases.map(tc => [tc.key || `tc-${tc.id}`, tc])); for (const similarity of similarities) { const key1 = similarity.testCase1Key; const key2 = similarity.testCase2Key; if (processed.has(key1) && processed.has(key2)) continue; // Find existing cluster let existingCluster = clusters.find(cluster => cluster.testCases.some(tc => tc.key === key1 || tc.key === key2) ); if (existingCluster) { // Add unprocessed test case const unprocessedKey = processed.has(key1) ? key2 : key1; const testCase = testCaseMap.get(unprocessedKey); if (testCase && !existingCluster.testCases.some(tc => tc.key === unprocessedKey)) { existingCluster.testCases.push({ key: testCase.key || `tc-${testCase.id}`, id: testCase.id, title: testCase.title || '', automationState: testCase.automationState?.name || 'Unknown', stepCount: this.extractSteps(testCase).length }); processed.add(unprocessedKey); } } else { // Create new cluster const testCase1 = testCaseMap.get(key1); const testCase2 = testCaseMap.get(key2); if (testCase1 && testCase2) { const clusterId = `semantic_cluster_${clusters.length + 1}`; const newCluster: SemanticTestCaseCluster = { clusterId, testCases: [ { key: testCase1.key || `tc-${testCase1.id}`, id: testCase1.id, title: testCase1.title || '', automationState: testCase1.automationState?.name || 'Unknown', stepCount: this.extractSteps(testCase1).length }, { key: testCase2.key || `tc-${testCase2.id}`, id: testCase2.id, title: testCase2.title || '', automationState: testCase2.automationState?.name || 'Unknown', stepCount: this.extractSteps(testCase2).length } ], averageSimilarity: similarity.similarityPercentage, sharedLogicSummary: similarity.sharedStepsSummary.join('; '), automationMix: { manual: 0, automated: 0, mixed: 0 }, recommendedBase: { testCaseKey: '', reason: '' }, mergingStrategy: '', stepClusters: [], semanticCoherence: similarity.semanticConfidence, medoidTestCase: '', clusterType: similarity.clusterBasedSimilarity > 90 ? 'exact_match' : 'semantic_similar' }; clusters.push(newCluster); processed.add(key1); processed.add(key2); } } } // Finalize clusters return clusters.map(cluster => this.finalizeSemanticCluster(cluster, similarities, stepClusters)); } /** * Finalize semantic cluster with enhanced analysis */ private finalizeSemanticCluster( cluster: SemanticTestCaseCluster, similarities: SemanticSimilarityScore[], stepClusters: StepCluster[] ): SemanticTestCaseCluster { // Calculate automation mix const automationCounts = { manual: 0, automated: 0, mixed: 0 }; cluster.testCases.forEach(tc => { const state = tc.automationState.toLowerCase(); if (state.includes('manual') || state.includes('not automated')) { automationCounts.manual++; } else if (state.includes('automated')) { automationCounts.automated++; } else { automationCounts.mixed++; } }); cluster.automationMix = automationCounts; // Find medoid (most central test case) if (this.options.useMedoidSelection) { cluster.medoidTestCase = this.findMedoid(cluster, similarities); cluster.recommendedBase = { testCaseKey: cluster.medoidTestCase, reason: 'Medoid test case - most representative of the cluster' }; } else { // Use existing logic const recommendedBase = this.determineBaseTestCase(cluster); cluster.recommendedBase = recommendedBase; cluster.medoidTestCase = recommendedBase.testCaseKey; } // Determine pattern type and strategy const clusterKeys = cluster.testCases.map(tc => tc.key); const clusterSimilarities = similarities.filter(s => clusterKeys.includes(s.testCase1Key) && clusterKeys.includes(s.testCase2Key) ); const patternType = clusterSimilarities.length > 0 ? clusterSimilarities[0].patternType : 'other'; cluster.mergingStrategy = this.suggestMergingStrategy(cluster, patternType); return cluster; } /** * Find medoid (most central) test case in cluster */ private findMedoid(cluster: SemanticTestCaseCluster, similarities: SemanticSimilarityScore[]): string { const testCaseKeys = cluster.testCases.map(tc => tc.key); let bestMedoid = testCaseKeys[0]; let minTotalDistance = Infinity; testCaseKeys.forEach(candidateKey => { let totalDistance = 0; let comparisons = 0; testCaseKeys.forEach(otherKey => { if (candidateKey === otherKey) return; const similarity = similarities.find(s => (s.testCase1Key === candidateKey && s.testCase2Key === otherKey) || (s.testCase1Key === otherKey && s.testCase2Key === candidateKey) ); if (similarity) { totalDistance += (100 - similarity.similarityPercentage); comparisons++; } }); const avgDistance = comparisons > 0 ? totalDistance / comparisons : Infinity; if (avgDistance < minTotalDistance) { minTotalDistance = avgDistance; bestMedoid = candidateKey; } }); return bestMedoid; } /** * Generate semantic insights using LLM analysis */ private async generateSemanticInsights( stepClusters: StepCluster[], semanticClusters: SemanticTestCaseCluster[], llmAnalysisFunction?: (prompt: string) => Promise<string> ): Promise<{ commonStepPatterns: string[]; discoveredWorkflows: string[]; automationOpportunities: string[]; }> { const insights: { commonStepPatterns: string[]; discoveredWorkflows: string[]; automationOpportunities: string[]; } = { commonStepPatterns: [], discoveredWorkflows: [], automationOpportunities: [] }; // Basic insights without LLM const topStepClusters = stepClusters .filter(c => c.frequency >= 3) .slice(0, 10) .map(c => c.semanticSummary); insights.commonStepPatterns = topStepClusters; const automationCandidates = semanticClusters .filter(c => c.automationMix.manual > 1 && c.automationMix.automated === 0) .map(c => `Cluster ${c.clusterId}: ${c.testCases.length} manual tests ready for automation`); insights.automationOpportunities = automationCandidates; // Enhanced insights with LLM if (llmAnalysisFunction && stepClusters.length > 0) { try { const prompt = ` Analyze these test step clusters and provide insights: Step Clusters: ${stepClusters.slice(0, 15).map(c => `- ${c.representativeStep} (appears ${c.frequency} times): ${c.semanticSummary}` ).join('\n')} Test Case Clusters: ${semanticClusters.slice(0, 10).map(c => `- Cluster ${c.clusterId}: ${c.testCases.length} test cases, ${c.averageSimilarity}% similarity` ).join('\n')} Please provide insights in JSON format: { "discoveredWorkflows": ["workflow pattern 1", "workflow pattern 2", ...], "automationOpportunities": ["opportunity 1", "opportunity 2", ...], "commonStepPatterns": ["pattern 1", "pattern 2", ...] } Focus on identifying: 1. Common user workflows that appear across multiple test cases 2. Opportunities for test automation and parameterization 3. Recurring step patterns that could be optimized `; const response = await llmAnalysisFunction(prompt); const parsed = JSON.parse(response); insights.discoveredWorkflows = parsed.discoveredWorkflows || []; insights.automationOpportunities = [ ...insights.automationOpportunities, ...(Array.isArray(parsed.automationOpportunities) ? parsed.automationOpportunities : []) ]; insights.commonStepPatterns = parsed.commonStepPatterns || insights.commonStepPatterns; } catch (error) { console.log(`โš ๏ธ LLM insights generation failed: ${error}`); } } return insights; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/maksimsarychau/mcp-zebrunner'

If you have feedback or need assistance with the MCP directory API, please join our Discord server