A/B Test Analysis
ab_test_analyzeCompare two campaign variants and determine the statistically significant winner via z-test, returning p-value, confidence interval, lift, and recommendation.
Instructions
Compare two campaigns as A/B test variants and determine statistical significance. Input: campaign_id_a, campaign_id_b, primary_metric ("ctr"|"conversion_rate"|"roas"|"cpc"|"cpa"). Runs a two-proportion z-test (or means comparison for continuous metrics), computes p-value and 95% confidence interval, identifies the winner, and returns {winner, confidence_level, p_value, lift_percent, sample_size_a, sample_size_b, significant (bool), recommendation}. Use with lift ≥5% and p<0.05 as a decision rule.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| campaign_id_a | Yes | Variant A campaign | |
| campaign_id_b | Yes | Variant B campaign | |
| primary_metric | No | ctr |
Implementation Reference
- src/services/ab-test.ts:7-97 (handler)Main handler: analyzeABTest function that compares two campaign variants as an A/B test. Fetches campaigns & metrics, computes aggregate stats, calculates statistical significance via Z-test or heuristic, determines winner, and returns results with recommendation.
export async function analyzeABTest( campaignIdA: string, campaignIdB: string, primaryMetric: 'ctr' | 'cpa' | 'roas' | 'conversion_rate' = 'ctr', store?: Storage, ): Promise<ABTestResult> { const s = store ?? defaultStorage; const campA = await s.getCampaignById(campaignIdA); if (!campA) throw new NotFoundError('Campaign', campaignIdA); const campB = await s.getCampaignById(campaignIdB); if (!campB) throw new NotFoundError('Campaign', campaignIdB); const metricsA = await s.getMetricsByCampaign(campaignIdA); const metricsB = await s.getMetricsByCampaign(campaignIdB); const aggA = aggregate(metricsA); const aggB = aggregate(metricsB); const variantA = { name: campA.name, impressions: aggA.impressions, clicks: aggA.clicks, conversions: aggA.conversions, spend: round(aggA.spend), ctr: calculateCTR(aggA.clicks, aggA.impressions), cpa: calculateCPA(aggA.spend, aggA.conversions), roas: calculateROAS(aggA.revenue, aggA.spend), }; const variantB = { name: campB.name, impressions: aggB.impressions, clicks: aggB.clicks, conversions: aggB.conversions, spend: round(aggB.spend), ctr: calculateCTR(aggB.clicks, aggB.impressions), cpa: calculateCPA(aggB.spend, aggB.conversions), roas: calculateROAS(aggB.revenue, aggB.spend), }; // Statistical significance const confidence = calculateSignificance( { ...aggA, spend: aggA.spend, revenue: aggA.revenue }, { ...aggB, spend: aggB.spend, revenue: aggB.revenue }, primaryMetric, ); // Determine winner const metricA = getMetricValue(variantA, primaryMetric); const metricB = getMetricValue(variantB, primaryMetric); const isLowerBetter = primaryMetric === 'cpa'; let winner: 'a' | 'b' | 'no_winner'; if (confidence < 90) { winner = 'no_winner'; } else if (isLowerBetter) { winner = metricA < metricB ? 'a' : 'b'; } else { winner = metricA > metricB ? 'a' : 'b'; } const winnerVal = winner === 'a' ? metricA : winner === 'b' ? metricB : 0; const loserVal = winner === 'a' ? metricB : winner === 'b' ? metricA : 0; const liftPercent = loserVal > 0 ? round(((winnerVal - loserVal) / loserVal) * 100) : 0; // Sample size check (minimum 100 clicks per variant for meaningful results) const sampleSizeSufficient = aggA.clicks >= 100 && aggB.clicks >= 100; let recommendation: string; if (!sampleSizeSufficient) { recommendation = `Insufficient data. Variant A has ${aggA.clicks} clicks, Variant B has ${aggB.clicks}. Need at least 100 clicks per variant for reliable results. Continue running the test.`; } else if (winner === 'no_winner') { recommendation = `No statistically significant winner yet (${round(confidence)}% confidence). Continue running the test or increase budget to reach significance faster.`; } else { const winnerName = winner === 'a' ? campA.name : campB.name; recommendation = `"${winnerName}" is the winner with ${round(confidence)}% confidence and ${Math.abs(liftPercent)}% lift in ${primaryMetric.toUpperCase()}. Consider scaling the winner and pausing the loser.`; } return { test_name: `${campA.name} vs ${campB.name}`, variant_a: variantA, variant_b: variantB, winner, confidence_level: round(confidence), primary_metric: primaryMetric, lift_percent: liftPercent, recommendation, sample_size_sufficient: sampleSizeSufficient, }; } - src/services/ab-test.ts:117-166 (helper)Statistical significance calculator: uses two-proportion Z-test (with normalCDF) for CTR/conversion_rate, heuristic based on sample size & effect size for CPA/ROAS metrics.
function calculateSignificance( a: { impressions: number; clicks: number; conversions: number; spend?: number; revenue?: number }, b: { impressions: number; clicks: number; conversions: number; spend?: number; revenue?: number }, metric: string, ): number { // For proportion-based metrics (CTR, conversion_rate): Z-test for proportions if (metric === 'ctr' || metric === 'conversion_rate') { let pA: number, pB: number, nA: number, nB: number; if (metric === 'ctr') { pA = a.impressions > 0 ? a.clicks / a.impressions : 0; pB = b.impressions > 0 ? b.clicks / b.impressions : 0; nA = a.impressions; nB = b.impressions; } else { pA = a.clicks > 0 ? a.conversions / a.clicks : 0; pB = b.clicks > 0 ? b.conversions / b.clicks : 0; nA = a.clicks; nB = b.clicks; } if (nA === 0 || nB === 0) return 0; const pooledP = (pA * nA + pB * nB) / (nA + nB); if (pooledP === 0 || pooledP === 1) return 0; const se = Math.sqrt(pooledP * (1 - pooledP) * (1 / nA + 1 / nB)); if (se === 0) return 0; const z = Math.abs(pA - pB) / se; const confidence = normalCDF(z) * 2 - 1; return Math.min(99.9, confidence * 100); } // For continuous metrics (CPA, ROAS): use sample size heuristic // Without per-day granularity, approximate confidence from conversion counts const totalConversions = a.conversions + b.conversions; if (totalConversions < 10) return 0; // Larger sample = higher confidence, capped at 95% const sampleFactor = Math.min(totalConversions / 100, 1); const metricDiff = metric === 'cpa' ? Math.abs((a.spend ?? 0) / Math.max(a.conversions, 1) - (b.spend ?? 0) / Math.max(b.conversions, 1)) : Math.abs((a.revenue ?? 0) / Math.max(a.spend ?? 1, 1) - (b.revenue ?? 0) / Math.max(b.spend ?? 1, 1)); const avgMetric = metric === 'cpa' ? ((a.spend ?? 0) + (b.spend ?? 0)) / Math.max(a.conversions + b.conversions, 1) : ((a.revenue ?? 0) + (b.revenue ?? 0)) / Math.max((a.spend ?? 1) + (b.spend ?? 1), 1); if (avgMetric === 0) return 0; const effectSize = metricDiff / avgMetric; const confidence = Math.min(99.9, effectSize * sampleFactor * 200); return Math.max(0, confidence); } - src/models/adops.ts:474-478 (schema)Input schema (ABTestAnalyzeInputSchema): validates campaign_id_a (UUID), campaign_id_b (UUID), and primary_metric (enum: ctr|cpa|roas|conversion_rate, default ctr).
export const ABTestAnalyzeInputSchema = z.object({ campaign_id_a: z.string().uuid().describe('Variant A campaign'), campaign_id_b: z.string().uuid().describe('Variant B campaign'), primary_metric: z.enum(['ctr', 'cpa', 'roas', 'conversion_rate']).default('ctr'), }); - src/models/adops.ts:223-252 (schema)Output schema (ABTestResultSchema): defines the structure of the A/B test result including variant details, winner, confidence_level, lift_percent, recommendation, and sample_size_sufficient.
export const ABTestResultSchema = z.object({ test_name: z.string(), variant_a: z.object({ name: z.string(), impressions: z.number(), clicks: z.number(), conversions: z.number(), spend: z.number(), ctr: z.number(), cpa: z.number(), roas: z.number(), }), variant_b: z.object({ name: z.string(), impressions: z.number(), clicks: z.number(), conversions: z.number(), spend: z.number(), ctr: z.number(), cpa: z.number(), roas: z.number(), }), winner: z.enum(['a', 'b', 'no_winner']), confidence_level: z.number().min(0).max(100).describe('Statistical significance (%)'), primary_metric: z.string(), lift_percent: z.number().describe('% improvement of winner over loser'), recommendation: z.string(), sample_size_sufficient: z.boolean(), }); export type ABTestResult = z.infer<typeof ABTestResultSchema>; - src/index.ts:548-566 (registration)Tool registration: MCP server.registerTool('ab_test_analyze', ...) with title, description, inputSchema, annotations, and handler that calls ensureProOrReject then analyzeABTest.
// ── Tool 12: ab_test_analyze ──────────────────────────────────────── server.registerTool( 'ab_test_analyze', { title: 'A/B Test Analysis', description: 'Compare two campaigns as A/B test variants and determine statistical significance. Input: campaign_id_a, campaign_id_b, primary_metric ("ctr"|"conversion_rate"|"roas"|"cpc"|"cpa"). Runs a two-proportion z-test (or means comparison for continuous metrics), computes p-value and 95% confidence interval, identifies the winner, and returns {winner, confidence_level, p_value, lift_percent, sample_size_a, sample_size_b, significant (bool), recommendation}. Use with lift ≥5% and p<0.05 as a decision rule.', inputSchema: ABTestAnalyzeInputSchema, annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, }, async ({ campaign_id_a, campaign_id_b, primary_metric }) => { try { const reject = await ensureProOrReject(LICENSE_CONFIG, 'ab_test_analyze'); if (reject) return reject; const result = await analyzeABTest(campaign_id_a, campaign_id_b, primary_metric); return { content: [{ type: 'text' as const, text: JSON.stringify(result, null, 2) }] }; } catch (e) { return handleToolError(e); } }, );