FFBB MCP Server

FFBB-MCP-Server
.agent
skills
app-store-optimization

ab_test_planner.py•22.3 KiB

""" A/B testing module for App Store Optimization. Plans and tracks A/B tests for metadata and visual assets. """ from typing import Dict, List, Any, Optional import math class ABTestPlanner: """Plans and tracks A/B tests for ASO elements.""" # Minimum detectable effect sizes (conservative estimates) MIN_EFFECT_SIZES = { 'icon': 0.10, # 10% conversion improvement 'screenshot': 0.08, # 8% conversion improvement 'title': 0.05, # 5% conversion improvement 'description': 0.03 # 3% conversion improvement } # Statistical confidence levels CONFIDENCE_LEVELS = { 'high': 0.95, # 95% confidence 'standard': 0.90, # 90% confidence 'exploratory': 0.80 # 80% confidence } def __init__(self): """Initialize A/B test planner.""" self.active_tests = [] def design_test( self, test_type: str, variant_a: Dict[str, Any], variant_b: Dict[str, Any], hypothesis: str, success_metric: str = 'conversion_rate' ) -> Dict[str, Any]: """ Design an A/B test with hypothesis and variables. Args: test_type: Type of test ('icon', 'screenshot', 'title', 'description') variant_a: Control variant details variant_b: Test variant details hypothesis: Expected outcome hypothesis success_metric: Metric to optimize Returns: Test design with configuration """ test_design = { 'test_id': self._generate_test_id(test_type), 'test_type': test_type, 'hypothesis': hypothesis, 'variants': { 'a': { 'name': 'Control', 'details': variant_a, 'traffic_split': 0.5 }, 'b': { 'name': 'Variation', 'details': variant_b, 'traffic_split': 0.5 } }, 'success_metric': success_metric, 'secondary_metrics': self._get_secondary_metrics(test_type), 'minimum_effect_size': self.MIN_EFFECT_SIZES.get(test_type, 0.05), 'recommended_confidence': 'standard', 'best_practices': self._get_test_best_practices(test_type) } self.active_tests.append(test_design) return test_design def calculate_sample_size( self, baseline_conversion: float, minimum_detectable_effect: float, confidence_level: str = 'standard', power: float = 0.80 ) -> Dict[str, Any]: """ Calculate required sample size for statistical significance. Args: baseline_conversion: Current conversion rate (0-1) minimum_detectable_effect: Minimum effect size to detect (0-1) confidence_level: 'high', 'standard', or 'exploratory' power: Statistical power (typically 0.80 or 0.90) Returns: Sample size calculation with duration estimates """ alpha = 1 - self.CONFIDENCE_LEVELS[confidence_level] beta = 1 - power # Expected conversion for variant B expected_conversion_b = baseline_conversion * (1 + minimum_detectable_effect) # Z-scores for alpha and beta z_alpha = self._get_z_score(1 - alpha / 2) # Two-tailed test z_beta = self._get_z_score(power) # Pooled standard deviation p_pooled = (baseline_conversion + expected_conversion_b) / 2 sd_pooled = math.sqrt(2 * p_pooled * (1 - p_pooled)) # Sample size per variant n_per_variant = math.ceil( ((z_alpha + z_beta) ** 2 * sd_pooled ** 2) / ((expected_conversion_b - baseline_conversion) ** 2) ) total_sample_size = n_per_variant * 2 # Estimate duration based on typical traffic duration_estimates = self._estimate_test_duration( total_sample_size, baseline_conversion ) return { 'sample_size_per_variant': n_per_variant, 'total_sample_size': total_sample_size, 'baseline_conversion': baseline_conversion, 'expected_conversion_improvement': minimum_detectable_effect, 'expected_conversion_b': expected_conversion_b, 'confidence_level': confidence_level, 'statistical_power': power, 'duration_estimates': duration_estimates, 'recommendations': self._generate_sample_size_recommendations( n_per_variant, duration_estimates ) } def calculate_significance( self, variant_a_conversions: int, variant_a_visitors: int, variant_b_conversions: int, variant_b_visitors: int ) -> Dict[str, Any]: """ Calculate statistical significance of test results. Args: variant_a_conversions: Conversions for control variant_a_visitors: Visitors for control variant_b_conversions: Conversions for variation variant_b_visitors: Visitors for variation Returns: Significance analysis with decision recommendation """ # Calculate conversion rates rate_a = variant_a_conversions / variant_a_visitors if variant_a_visitors > 0 else 0 rate_b = variant_b_conversions / variant_b_visitors if variant_b_visitors > 0 else 0 # Calculate improvement if rate_a > 0: relative_improvement = (rate_b - rate_a) / rate_a else: relative_improvement = 0 absolute_improvement = rate_b - rate_a # Calculate standard error se_a = math.sqrt(rate_a * (1 - rate_a) / variant_a_visitors) if variant_a_visitors > 0 else 0 se_b = math.sqrt(rate_b * (1 - rate_b) / variant_b_visitors) if variant_b_visitors > 0 else 0 se_diff = math.sqrt(se_a**2 + se_b**2) # Calculate z-score z_score = absolute_improvement / se_diff if se_diff > 0 else 0 # Calculate p-value (two-tailed) p_value = 2 * (1 - self._standard_normal_cdf(abs(z_score))) # Determine significance is_significant_95 = p_value < 0.05 is_significant_90 = p_value < 0.10 # Generate decision decision = self._generate_test_decision( relative_improvement, is_significant_95, is_significant_90, variant_a_visitors + variant_b_visitors ) return { 'variant_a': { 'conversions': variant_a_conversions, 'visitors': variant_a_visitors, 'conversion_rate': round(rate_a, 4) }, 'variant_b': { 'conversions': variant_b_conversions, 'visitors': variant_b_visitors, 'conversion_rate': round(rate_b, 4) }, 'improvement': { 'absolute': round(absolute_improvement, 4), 'relative_percentage': round(relative_improvement * 100, 2) }, 'statistical_analysis': { 'z_score': round(z_score, 3), 'p_value': round(p_value, 4), 'is_significant_95': is_significant_95, 'is_significant_90': is_significant_90, 'confidence_level': '95%' if is_significant_95 else ('90%' if is_significant_90 else 'Not significant') }, 'decision': decision } def track_test_results( self, test_id: str, results_data: Dict[str, Any] ) -> Dict[str, Any]: """ Track ongoing test results and provide recommendations. Args: test_id: Test identifier results_data: Current test results Returns: Test tracking report with next steps """ # Find test test = next((t for t in self.active_tests if t['test_id'] == test_id), None) if not test: return {'error': f'Test {test_id} not found'} # Calculate significance significance = self.calculate_significance( results_data['variant_a_conversions'], results_data['variant_a_visitors'], results_data['variant_b_conversions'], results_data['variant_b_visitors'] ) # Calculate test progress total_visitors = results_data['variant_a_visitors'] + results_data['variant_b_visitors'] required_sample = results_data.get('required_sample_size', 10000) progress_percentage = min((total_visitors / required_sample) * 100, 100) # Generate recommendations recommendations = self._generate_tracking_recommendations( significance, progress_percentage, test['test_type'] ) return { 'test_id': test_id, 'test_type': test['test_type'], 'progress': { 'total_visitors': total_visitors, 'required_sample_size': required_sample, 'progress_percentage': round(progress_percentage, 1), 'is_complete': progress_percentage >= 100 }, 'current_results': significance, 'recommendations': recommendations, 'next_steps': self._determine_next_steps( significance, progress_percentage ) } def generate_test_report( self, test_id: str, final_results: Dict[str, Any] ) -> Dict[str, Any]: """ Generate final test report with insights and recommendations. Args: test_id: Test identifier final_results: Final test results Returns: Comprehensive test report """ test = next((t for t in self.active_tests if t['test_id'] == test_id), None) if not test: return {'error': f'Test {test_id} not found'} significance = self.calculate_significance( final_results['variant_a_conversions'], final_results['variant_a_visitors'], final_results['variant_b_conversions'], final_results['variant_b_visitors'] ) # Generate insights insights = self._generate_test_insights( test, significance, final_results ) # Implementation plan implementation_plan = self._create_implementation_plan( test, significance ) return { 'test_summary': { 'test_id': test_id, 'test_type': test['test_type'], 'hypothesis': test['hypothesis'], 'duration_days': final_results.get('duration_days', 'N/A') }, 'results': significance, 'insights': insights, 'implementation_plan': implementation_plan, 'learnings': self._extract_learnings(test, significance) } def _generate_test_id(self, test_type: str) -> str: """Generate unique test ID.""" import time timestamp = int(time.time()) return f"{test_type}_{timestamp}" def _get_secondary_metrics(self, test_type: str) -> List[str]: """Get secondary metrics to track for test type.""" metrics_map = { 'icon': ['tap_through_rate', 'impression_count', 'brand_recall'], 'screenshot': ['tap_through_rate', 'time_on_page', 'scroll_depth'], 'title': ['impression_count', 'tap_through_rate', 'search_visibility'], 'description': ['time_on_page', 'scroll_depth', 'tap_through_rate'] } return metrics_map.get(test_type, ['tap_through_rate']) def _get_test_best_practices(self, test_type: str) -> List[str]: """Get best practices for specific test type.""" practices_map = { 'icon': [ 'Test only one element at a time (color vs. style vs. symbolism)', 'Ensure icon is recognizable at small sizes (60x60px)', 'Consider cultural context for global audience', 'Test against top competitor icons' ], 'screenshot': [ 'Test order of screenshots (users see first 2-3)', 'Use captions to tell story', 'Show key features and benefits', 'Test with and without device frames' ], 'title': [ 'Test keyword variations, not major rebrand', 'Keep brand name consistent', 'Ensure title fits within character limits', 'Test on both search and browse contexts' ], 'description': [ 'Test structure (bullet points vs. paragraphs)', 'Test call-to-action placement', 'Test feature vs. benefit focus', 'Maintain keyword density' ] } return practices_map.get(test_type, ['Test one variable at a time']) def _estimate_test_duration( self, required_sample_size: int, baseline_conversion: float ) -> Dict[str, Any]: """Estimate test duration based on typical traffic levels.""" # Assume different daily traffic scenarios traffic_scenarios = { 'low': 100, # 100 page views/day 'medium': 1000, # 1000 page views/day 'high': 10000 # 10000 page views/day } estimates = {} for scenario, daily_views in traffic_scenarios.items(): days = math.ceil(required_sample_size / daily_views) estimates[scenario] = { 'daily_page_views': daily_views, 'estimated_days': days, 'estimated_weeks': round(days / 7, 1) } return estimates def _generate_sample_size_recommendations( self, sample_size: int, duration_estimates: Dict[str, Any] ) -> List[str]: """Generate recommendations based on sample size.""" recommendations = [] if sample_size > 50000: recommendations.append( "Large sample size required - consider testing smaller effect size or increasing traffic" ) if duration_estimates['medium']['estimated_days'] > 30: recommendations.append( "Long test duration - consider higher minimum detectable effect or focus on high-impact changes" ) if duration_estimates['low']['estimated_days'] > 60: recommendations.append( "Insufficient traffic for reliable testing - consider user acquisition or broader targeting" ) if not recommendations: recommendations.append("Sample size and duration are reasonable for this test") return recommendations def _get_z_score(self, percentile: float) -> float: """Get z-score for given percentile (approximation).""" # Common z-scores z_scores = { 0.80: 0.84, 0.85: 1.04, 0.90: 1.28, 0.95: 1.645, 0.975: 1.96, 0.99: 2.33 } return z_scores.get(percentile, 1.96) def _standard_normal_cdf(self, z: float) -> float: """Approximate standard normal cumulative distribution function.""" # Using error function approximation t = 1.0 / (1.0 + 0.2316419 * abs(z)) d = 0.3989423 * math.exp(-z * z / 2.0) p = d * t * (0.3193815 + t * (-0.3565638 + t * (1.781478 + t * (-1.821256 + t * 1.330274)))) if z > 0: return 1.0 - p else: return p def _generate_test_decision( self, improvement: float, is_significant_95: bool, is_significant_90: bool, total_visitors: int ) -> Dict[str, Any]: """Generate test decision and recommendation.""" if total_visitors < 1000: return { 'decision': 'continue', 'rationale': 'Insufficient data - continue test to reach minimum sample size', 'action': 'Keep test running' } if is_significant_95: if improvement > 0: return { 'decision': 'implement_b', 'rationale': f'Variant B shows {improvement*100:.1f}% improvement with 95% confidence', 'action': 'Implement Variant B' } else: return { 'decision': 'keep_a', 'rationale': 'Variant A performs better with 95% confidence', 'action': 'Keep current version (A)' } elif is_significant_90: if improvement > 0: return { 'decision': 'implement_b_cautiously', 'rationale': f'Variant B shows {improvement*100:.1f}% improvement with 90% confidence', 'action': 'Consider implementing B, monitor closely' } else: return { 'decision': 'keep_a', 'rationale': 'Variant A performs better with 90% confidence', 'action': 'Keep current version (A)' } else: return { 'decision': 'inconclusive', 'rationale': 'No statistically significant difference detected', 'action': 'Either keep A or test different hypothesis' } def _generate_tracking_recommendations( self, significance: Dict[str, Any], progress: float, test_type: str ) -> List[str]: """Generate recommendations for ongoing test.""" recommendations = [] if progress < 50: recommendations.append( f"Test is {progress:.0f}% complete - continue collecting data" ) if progress >= 100: if significance['statistical_analysis']['is_significant_95']: recommendations.append( "Sufficient data collected with significant results - ready to conclude test" ) else: recommendations.append( "Sample size reached but no significant difference - consider extending test or concluding" ) return recommendations def _determine_next_steps( self, significance: Dict[str, Any], progress: float ) -> str: """Determine next steps for test.""" if progress < 100: return f"Continue test until reaching 100% sample size (currently {progress:.0f}%)" decision = significance.get('decision', {}).get('decision', 'inconclusive') if decision == 'implement_b': return "Implement Variant B and monitor metrics for 2 weeks" elif decision == 'keep_a': return "Keep Variant A and design new test with different hypothesis" else: return "Test inconclusive - either keep A or design new test" def _generate_test_insights( self, test: Dict[str, Any], significance: Dict[str, Any], results: Dict[str, Any] ) -> List[str]: """Generate insights from test results.""" insights = [] improvement = significance['improvement']['relative_percentage'] if significance['statistical_analysis']['is_significant_95']: insights.append( f"Strong evidence: Variant B {'improved' if improvement > 0 else 'decreased'} " f"conversion by {abs(improvement):.1f}% with 95% confidence" ) insights.append( f"Tested {test['test_type']} changes: {test['hypothesis']}" ) # Add context-specific insights if test['test_type'] == 'icon' and improvement > 5: insights.append( "Icon change had substantial impact - visual first impression is critical" ) return insights def _create_implementation_plan( self, test: Dict[str, Any], significance: Dict[str, Any] ) -> List[Dict[str, str]]: """Create implementation plan for winning variant.""" plan = [] if significance.get('decision', {}).get('decision') == 'implement_b': plan.append({ 'step': '1. Update store listing', 'details': f"Replace {test['test_type']} with Variant B across all platforms" }) plan.append({ 'step': '2. Monitor metrics', 'details': 'Track conversion rate for 2 weeks to confirm sustained improvement' }) plan.append({ 'step': '3. Document learnings', 'details': 'Record insights for future optimization' }) return plan def _extract_learnings( self, test: Dict[str, Any], significance: Dict[str, Any] ) -> List[str]: """Extract key learnings from test.""" learnings = [] improvement = significance['improvement']['relative_percentage'] learnings.append( f"Testing {test['test_type']} can yield {abs(improvement):.1f}% conversion change" ) if test['test_type'] == 'title': learnings.append( "Title changes affect search visibility and user perception" ) elif test['test_type'] == 'screenshot': learnings.append( "First 2-3 screenshots are critical for conversion" ) return learnings def plan_ab_test( test_type: str, variant_a: Dict[str, Any], variant_b: Dict[str, Any], hypothesis: str, baseline_conversion: float ) -> Dict[str, Any]: """ Convenience function to plan an A/B test. Args: test_type: Type of test variant_a: Control variant variant_b: Test variant hypothesis: Test hypothesis baseline_conversion: Current conversion rate Returns: Complete test plan """ planner = ABTestPlanner() test_design = planner.design_test( test_type, variant_a, variant_b, hypothesis ) sample_size = planner.calculate_sample_size( baseline_conversion, planner.MIN_EFFECT_SIZES.get(test_type, 0.05) ) return { 'test_design': test_design, 'sample_size_requirements': sample_size }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/nickdesi/FFBB-MCP-Server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

ab_test_planner.py•22.3 KiB