Skip to main content
Glama

StatFlow

run_analysis.py•34 kB
""" Direct Analysis Runner - Generate Excel with Modular Architecture Run this script to create your experiment analysis Excel file directly. The analysis functions are now modular and reusable: - Statistical analysis (t-tests): src/statflow/analysis/statistical_analysis.py - Table generators: src/statflow/analysis/table_generators.py - AI insights: src/statflow/analysis/ai_insights.py Usage: python run_analysis.py """ import json import sys import os from datetime import datetime # Add src to path sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) import mysql.connector from mysql.connector import Error import openpyxl from openpyxl.styles import Font, PatternFill, Alignment, Border, Side from openpyxl.utils import get_column_letter import openpyxl.utils from statflow.query_builder import QueryBuilder from statflow.analysis import ( create_ttest_table, generate_ai_insights, create_avg_metric_table, create_section_summary_table, create_avg_gq_table, generate_word_report_with_real_data, create_ssd_vs_alt_ttest_table ) def load_config(): """Load database configuration from config.json.""" try: with open('config.json', 'r') as f: return json.load(f) except FileNotFoundError: print("[!] Error: config.json not found!") print(" Please create config.json with your MySQL credentials.") sys.exit(1) except json.JSONDecodeError as e: print(f"[!] Error parsing config.json: {e}") sys.exit(1) def run_combined_query(config): """ Run the combined query across both databases. Returns: tuple: (columns, data, query_builder) or (None, None, None) on error """ db1_config = config['mysql_dump'] try: # Connect to MySQL server connection = mysql.connector.connect( host=db1_config['host'], port=db1_config['port'], user=db1_config['user'], password=db1_config['password'] ) cursor = connection.cursor(dictionary=True) # Build query using modular query builder # Dynamically load all database configs from config.json databases = [] for key in sorted(config.keys()): if key.startswith('mysql_dump'): databases.append(config[key]) query_builder = QueryBuilder(databases) query = query_builder.build_query() print("[*] Running query for userIds with complete analysis...") cursor.execute(query) results = cursor.fetchall() columns = cursor.column_names cursor.close() connection.close() # Count experts and novices experts = sum(1 for r in results if r['pre_test_binary'] == 'expert') novices = sum(1 for r in results if r['pre_test_binary'] == 'novice') print(f"[+] Fetched {len(results)} users: {experts} experts, {novices} novices") return columns, results, query_builder except Error as e: print(f"[!] MySQL Error: {e}") return None, None, None def export_to_excel(columns, data, output_path, query_builder, config, update_existing=True): """ Export data to Excel with comprehensive formatting and analysis. Args: columns: List of column names from query data: List of dictionaries containing the data output_path: Path where Excel file should be saved query_builder: QueryBuilder instance for getting header sections config: Configuration dictionary update_existing: If True, update existing file; if False, create new Returns: bool: True if successful, False otherwise """ try: # Load existing workbook or create new one if update_existing and os.path.exists(output_path): print(f"[*] Updating existing file: {output_path}") workbook = openpyxl.load_workbook(output_path) # Remove old sheet names if they exist (to prevent duplicates) if "Combined_Data" in workbook.sheetnames: del workbook["Combined_Data"] if "Expert_Novice_Analysis" in workbook.sheetnames: del workbook["Expert_Novice_Analysis"] # Create new Expert_Novice_Analysis sheet at the beginning if workbook.sheetnames: workbook.create_sheet("Expert_Novice_Analysis", 0) else: workbook.create_sheet("Expert_Novice_Analysis") sheet = workbook["Expert_Novice_Analysis"] else: print(f"[*] Creating new file: {output_path}") workbook = openpyxl.Workbook() sheet = workbook.active sheet.title = "Expert_Novice_Analysis" # Helper alias for get_column_letter gcl = get_column_letter # Row 1: Merged headers (dynamic from query builder) header_sections = query_builder.get_header_sections() for section in header_sections: cell_range = f"{section['start_col']}1:{section['end_col']}1" sheet.merge_cells(cell_range) cell = sheet[f"{section['start_col']}1"] cell.value = section['name'] cell.font = Font(bold=True, size=12, color="1F4E78") cell.alignment = Alignment(horizontal="center", vertical="center") # Row 2: Column headers column_mapping = query_builder.get_column_mapping() light_grey_border = Side(style='thin', color='D3D3D3') for col_idx, col_name in enumerate(columns, start=1): cell = sheet.cell(row=2, column=col_idx) display_name = column_mapping.get(col_name, col_name) cell.value = display_name cell.font = Font(bold=True, size=10) cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True) # Add light grey border to headers cell.border = Border( left=light_grey_border, right=light_grey_border, top=light_grey_border, bottom=light_grey_border ) # Map column indices to section names for coloring col_to_section = {} for section in header_sections: start_col_idx = openpyxl.utils.column_index_from_string(section['start_col']) end_col_idx = openpyxl.utils.column_index_from_string(section['end_col']) for col_idx in range(start_col_idx, end_col_idx + 1): col_to_section[col_idx] = section['name'] # Track the row where experts end (to add bold line separator) expert_end_row = None # Define colors for each section section_colors = { 'User characteristics': 'FDE9D9', # Orange 'Time': 'E8F8E8', # Green 'Accuracy': 'E6F2FF', # Blue 'Satisfaction': 'F4EBFA', # Plum 'Graph Questions': 'F2F2F2' # Light Gray } # Data rows (starting from row 3) data_start_row = 3 for row_idx, row_data in enumerate(data, start=data_start_row): for col_idx, col_name in enumerate(columns, start=1): cell = sheet.cell(row=row_idx, column=col_idx) value = row_data.get(col_name) # Handle None values if value is None: cell.value = "" else: cell.value = value # Apply section-based coloring and light grey borders section_name = col_to_section.get(col_idx) if section_name and section_name in section_colors: color = section_colors[section_name] cell.fill = PatternFill(start_color=color, end_color=color, fill_type="solid") # Add light grey border for better distinction light_grey_border = Side(style='thin', color='D3D3D3') cell.border = Border( left=light_grey_border, right=light_grey_border, top=light_grey_border, bottom=light_grey_border ) # Alignment cell.alignment = Alignment(horizontal="center", vertical="center") # Number formatting for time columns (if it looks like a time value) if col_name.startswith('TIME_') and isinstance(value, (int, float)): cell.number_format = '0.00' # Track where experts end if row_data.get('pre_test_binary') == 'expert': expert_end_row = row_idx # Add bold border between experts and novices if expert_end_row: thick_bottom = Side(style='medium', color='000000') for col_idx in range(1, len(columns) + 1): cell = sheet.cell(row=expert_end_row, column=col_idx) current_border = cell.border cell.border = Border( left=current_border.left, right=current_border.right, top=current_border.top, bottom=thick_bottom ) # AVERAGE row (after all data) average_row = data_start_row + len(data) # Set row height for average row sheet.row_dimensions[average_row].height = 20 for col_idx, col_name in enumerate(columns, start=1): cell = sheet.cell(row=average_row, column=col_idx) # Special handling for certain columns if col_name == 'unique_userId': cell.value = "AVERAGE" cell.font = Font(bold=True, size=11, color="FFFFFF") elif col_name == 'pre_study_testscore': cell.value = "" # Don't average the test score elif col_name == 'pre_test_binary': cell.value = "" # Don't average binary values else: # Calculate average for numeric columns col_letter = gcl(col_idx) cell.value = f'=AVERAGE({col_letter}{data_start_row}:{col_letter}{average_row-1})' cell.number_format = '0.00' cell.font = Font(bold=True, size=10, color="FFFFFF") # Orange background for average row with light grey border cell.fill = PatternFill(start_color="F4B084", end_color="F4B084", fill_type="solid") cell.alignment = Alignment(horizontal="center", vertical="center") # Add light grey border for consistency light_grey_border = Side(style='thin', color='D3D3D3') cell.border = Border( left=light_grey_border, right=light_grey_border, top=light_grey_border, bottom=light_grey_border ) # Add bold borders around each section thick_border = Side(style='medium', color='000000') for section in header_sections: # Get column indices start_col_idx = openpyxl.utils.column_index_from_string(section['start_col']) end_col_idx = openpyxl.utils.column_index_from_string(section['end_col']) # Apply thick borders to section edges (rows 1 to average_row) for row_idx in range(1, average_row + 1): # Left border left_cell = sheet.cell(row=row_idx, column=start_col_idx) left_cell.border = Border( left=thick_border, right=left_cell.border.right, top=left_cell.border.top, bottom=left_cell.border.bottom ) # Right border right_cell = sheet.cell(row=row_idx, column=end_col_idx) right_cell.border = Border( left=right_cell.border.left, right=thick_border, top=right_cell.border.top, bottom=right_cell.border.bottom ) # Auto-adjust column widths for col_idx in range(1, len(columns) + 1): max_length = 0 col_letter = gcl(col_idx) for row in sheet[col_letter]: try: if row.value: max_length = max(max_length, len(str(row.value))) except: pass adjusted_width = min(max_length + 2, 30) # Cap at 30 sheet.column_dimensions[col_letter].width = max(adjusted_width, 12) # Freeze panes: Keep headers (rows 1-2) and first column (unique_userId) visible while scrolling # Freeze at B3 means everything above row 3 and left of column B will be frozen sheet.freeze_panes = 'B3' # ===== ADD STATISTICAL ANALYSIS TABLES ===== # Determine row ranges for experts and novices expert_start = data_start_row expert_end = expert_end_row if expert_end_row else data_start_row novice_start = expert_end + 1 if expert_end_row else data_start_row novice_end = average_row - 1 # Start t-test tables below the average row ttest_start_row = average_row + 3 # Graph lists for different metrics ssd_time_graphs = [ ('TIME_EQU_1', 'EQU-1'), ('TIME_STU_2', 'STU-2'), ('TIME_GOA_3', 'GOA-3'), ('TIME_PAT_4', 'PAT-4'), ('TIME_ENR_5', 'ENR-5') ] alt_time_graphs = [ ('TIME_EQU_6', 'EQU-6'), ('TIME_STU_7', 'STU-7'), ('TIME_GOA_8', 'GOA-8'), ('TIME_PAT_9', 'PAT-9'), ('TIME_ENR_10', 'ENR-10') ] # Find column positions for each section time_section_col = None accuracy_section_col = None satisfaction_section_col = None graph_questions_section_col = None for section in header_sections: if section['name'] == 'Time': time_section_col = openpyxl.utils.column_index_from_string(section['start_col']) elif section['name'] == 'Accuracy': accuracy_section_col = openpyxl.utils.column_index_from_string(section['start_col']) elif section['name'] == 'Satisfaction': satisfaction_section_col = openpyxl.utils.column_index_from_string(section['start_col']) elif section['name'] == 'Graph Questions': graph_questions_section_col = openpyxl.utils.column_index_from_string(section['start_col']) # Create TIME T-TEST table under Time section if time_section_col: ssd_time_graphs = [ ('TIME_EQU_1', 'EQU-1'), ('TIME_STU_2', 'STU-2'), ('TIME_GOA_3', 'GOA-3'), ('TIME_PAT_4', 'PAT-4'), ('TIME_ENR_5', 'ENR-5') ] alt_time_graphs = [ ('TIME_EQU_6', 'EQU-6'), ('TIME_STU_7', 'STU-7'), ('TIME_GOA_8', 'GOA-8'), ('TIME_PAT_9', 'PAT-9'), ('TIME_ENR_10', 'ENR-10') ] time_aggregates = [ ('AVG_TIME_SSD', 'Avg SSD'), ('AVG_TIME_ALT', 'Avg ALT') ] time_overall = ('AVG_OVERALL_TIME', 'Overall Avg') time_ttest_end_row = create_ttest_table( sheet, ttest_start_row, time_section_col, "TIME T-TEST", "27AE60", "16A085", "E8F8F5", ssd_time_graphs, alt_time_graphs, columns, expert_start, expert_end, novice_start, novice_end, time_aggregates, time_overall ) # Add Average Time table below the t-test table avg_time_table_start = time_ttest_end_row + 2 avg_time_end_row = create_avg_metric_table( sheet, avg_time_table_start, time_section_col, columns, data_start_row, average_row - 1, 'TIME', "AVERAGE TIME IN SECONDS BY GRAPH TYPE", "27AE60", "16A085" ) # Add Time Summary table (SSD vs ALT) time_summary_start = avg_time_end_row + 2 create_section_summary_table( sheet, time_summary_start, time_section_col, columns, average_row, 'TIME', "OVERALL TIME: SSD vs ALT", "27AE60", "16A085" ) # Create ACCURACY T-TEST table under Accuracy section if accuracy_section_col: ssd_acc_graphs = [ ('ACC_EQU_1', 'EQU-1'), ('ACC_STU_2', 'STU-2'), ('ACC_GOA_3', 'GOA-3'), ('ACC_PAT_4', 'PAT-4'), ('ACC_ENR_5', 'ENR-5') ] alt_acc_graphs = [ ('ACC_EQU_6', 'EQU-6'), ('ACC_STU_7', 'STU-7'), ('ACC_GOA_8', 'GOA-8'), ('ACC_PAT_9', 'PAT-9'), ('ACC_ENR_10', 'ENR-10') ] accuracy_aggregates = [ ('AVG_ACC_SSD', 'Avg SSD'), ('AVG_ACC_ALT', 'Avg ALT') ] acc_overall = ('AVG_OVERALL_ACC', 'Overall Avg') acc_ttest_end_row = create_ttest_table( sheet, ttest_start_row, accuracy_section_col, "ACCURACY T-TEST", "3498DB", "2980B9", "EBF5FB", ssd_acc_graphs, alt_acc_graphs, columns, expert_start, expert_end, novice_start, novice_end, accuracy_aggregates, acc_overall ) # Add Average Accuracy table below the t-test table avg_acc_table_start = acc_ttest_end_row + 2 avg_acc_end_row = create_avg_metric_table( sheet, avg_acc_table_start, accuracy_section_col, columns, data_start_row, average_row - 1, 'ACCURACY', "AVERAGE ACCURACY BY GRAPH TYPE", "3498DB", "2980B9" ) # Add Accuracy Summary table (SSD vs ALT) acc_summary_start = avg_acc_end_row + 2 create_section_summary_table( sheet, acc_summary_start, accuracy_section_col, columns, average_row, 'ACCURACY', "OVERALL ACCURACY: SSD vs ALT", "3498DB", "2980B9" ) # Create SATISFACTION T-TEST table under Satisfaction section if satisfaction_section_col: ssd_sat_graphs = [ ('SAT_EQU_1', 'EQU-1'), ('SAT_STU_2', 'STU-2'), ('SAT_GOA_3', 'GOA-3'), ('SAT_PAT_4', 'PAT-4'), ('SAT_ENR_5', 'ENR-5') ] alt_sat_graphs = [ ('SAT_EQU_6', 'EQU-6'), ('SAT_STU_7', 'STU-7'), ('SAT_GOA_8', 'GOA-8'), ('SAT_PAT_9', 'PAT-9'), ('SAT_ENR_10', 'ENR-10') ] satisfaction_aggregates = [ ('AVG_SAT_SSD', 'Avg SSD'), ('AVG_SAT_ALT', 'Avg ALT') ] sat_overall = ('AVG_OVERALL_SAT', 'Overall Avg') sat_ttest_end_row = create_ttest_table( sheet, ttest_start_row, satisfaction_section_col, "SATISFACTION T-TEST", "9B59B6", "8E44AD", "F4ECF7", ssd_sat_graphs, alt_sat_graphs, columns, expert_start, expert_end, novice_start, novice_end, satisfaction_aggregates, sat_overall ) # Add Average Satisfaction table below the t-test table avg_sat_table_start = sat_ttest_end_row + 2 avg_sat_end_row = create_avg_metric_table( sheet, avg_sat_table_start, satisfaction_section_col, columns, data_start_row, average_row - 1, 'SATISFACTION', "AVERAGE SATISFACTION BY GRAPH TYPE", "9B59B6", "8E44AD" ) # Add Satisfaction Summary table (SSD vs ALT) sat_summary_start = avg_sat_end_row + 2 create_section_summary_table( sheet, sat_summary_start, satisfaction_section_col, columns, average_row, 'SATISFACTION', "OVERALL SATISFACTION: SSD vs ALT", "9B59B6", "8E44AD" ) # Create GRAPH QUESTIONS T-TEST table under Graph Questions section if graph_questions_section_col: # Graph questions are only for graphs 1-5 (SSD graphs) # Each graph has 2 questions (GQ1 and GQ2) gq_questions = [ ('GQ_EQU_1_1', 'EQU-1-1'), ('GQ_EQU_1_2', 'EQU-1-2'), ('GQ_STU_2_1', 'STU-2-1'), ('GQ_STU_2_2', 'STU-2-2'), ('GQ_GOA_3_1', 'GOA-3-1'), ('GQ_GOA_3_2', 'GOA-3-2'), ('GQ_PAT_4_1', 'PAT-4-1'), ('GQ_PAT_4_2', 'PAT-4-2'), ('GQ_ENR_5_1', 'ENR-5-1'), ('GQ_ENR_5_2', 'ENR-5-2') ] # No ALT graphs for graph questions, so pass empty list alt_gq_questions = [] gq_aggregates = [ ('TOTAL_GQ_SCORE', 'Total GQ Score'), ('AVG_GQ_SCORE', 'Avg GQ Score') ] gq_overall = ('AVG_GQ_SCORE', 'Overall Avg') gq_ttest_end_row = create_ttest_table( sheet, ttest_start_row, graph_questions_section_col, "GRAPH QUESTIONS T-TEST", "F39C12", "E67E22", "FEF5E7", gq_questions, alt_gq_questions, columns, expert_start, expert_end, novice_start, novice_end, gq_aggregates, gq_overall ) # Add Average Graph Questions table below the t-test table avg_gq_table_start = gq_ttest_end_row + 2 create_avg_gq_table( sheet, avg_gq_table_start, graph_questions_section_col, columns, data_start_row, average_row - 1, "AVERAGE GRAPH QUESTIONS PERFORMANCE BY GRAPH TYPE", "F39C12", "E67E22" ) # ===== GENERATE AI INSIGHTS ===== # AI insights generation is now handled by the modular function generate_ai_insights(workbook, data, columns, config) # Save workbook workbook.save(output_path) print(f"[+] Excel file saved: {output_path}") print(f" - Row 1: Section headers (User characteristics, Time, Accuracy, Satisfaction, Graph Questions)") print(f" - Row 2: Bold column headers") print(f" - Row 3-{average_row-1}: {len(data)} data rows with color-coded sections:") print(f" - User characteristics: Orange") print(f" - Time: Green") print(f" - Accuracy: Blue") print(f" - Satisfaction: Plum") print(f" - Graph Questions: Light Gray") print(f" - Bold horizontal line separating Experts from Novices") print(f" - Row {average_row}: AVERAGE row (orange background with white text)") print(f" - Bold vertical borders around each section for visual distinction") print(f" - Light grey borders on all colored cells for better readability") print(f" - Freeze panes: Headers (rows 1-2) and first column stay visible when scrolling") print(f" - T-Test Analysis tables starting at row {ttest_start_row}:") print(f" * TIME T-Test positioned under Time section (includes SSD, ALT, Aggregates, Total Overall)") print(f" * ACCURACY T-Test positioned under Accuracy section (includes SSD, ALT, Aggregates, Total Overall)") print(f" * SATISFACTION T-Test positioned under Satisfaction section (includes SSD, ALT, Aggregates, Total Overall)") print(f" * GRAPH QUESTIONS T-Test positioned under Graph Questions section (all 10 questions, Aggregates, Total Overall)") print(f" - Average Metric tables (below each t-test):") print(f" * Average Time in Seconds by Graph Type") print(f" * Average Accuracy by Graph Type") print(f" * Average Satisfaction by Graph Type") print(f" * Average Graph Questions Performance by Graph Type") print(f" - Overall Summary tables (SSD vs ALT comparison under each section)") if config.get('openai', {}).get('enabled', False): print(f" - AI Insights sheet with comprehensive analysis") print(f" - Other sheets preserved (if any existed)") return True except Exception as e: print(f"[!] Error creating Excel file: {e}") return False def main(): """Main execution.""" print("=" * 60) print(" EXPERIMENT ANALYSIS - Direct Excel Export") print(" Now with Modular Architecture!") print("=" * 60) print() # Load configuration print("[*] Loading configuration from config.json...") config = load_config() # Check credentials if config['mysql_dump']['user'] == 'your_username': print() print("[!] WARNING: You need to update config.json first!") print(" Please edit config.json and set your MySQL credentials:") print(" - user: your MySQL username") print(" - password: your MySQL password") print() sys.exit(1) print(f"[+] Configuration loaded") print(f" - Database 1: {config['mysql_dump']['database']}") print(f" - Database 2: {config['mysql_dump_2']['database']}") print() # Run query columns, data, query_builder = run_combined_query(config) if columns is None or data is None or query_builder is None: print() print("[!] Failed to fetch data. Please check:") print(" - MySQL server is running") print(" - Database credentials are correct") print(" - Both databases exist") print(" - Table 'mainstudy_responses' exists in both databases") sys.exit(1) print() # Prepare output path output_dir = config['excel_output']['default_path'] # Create output directory if it doesn't exist if not os.path.exists(output_dir): print(f"šŸ“ Creating output directory: {output_dir}") os.makedirs(output_dir) # Use fixed filename (will update same file each time) output_file = os.path.join(output_dir, "experiment_analysis.xlsx") # Check if file exists if os.path.exists(output_file): print(f"[!] File already exists: {output_file}") print(f" The Combined_Data sheet will be refreshed with latest data.") # Export to Excel (will update if exists) print(f"[*] Exporting to Excel...") success = export_to_excel(columns, data, output_file, query_builder, config, update_existing=True) # Generate Word document print(f"[*] Generating Word document...") word_output_file = os.path.join(output_dir, "experiment_analysis.docx") # Extract analysis data for Word document ttest_results = {} avg_metrics = {} summary_tables = {} # Generate AI insights if enabled ai_insights = None if config.get('openai', {}).get('enabled', False): try: ai_insights = generate_ai_insights(data, config) except Exception as e: print(f"[!] AI insights generation failed: {e}") ai_insights = None # Generate Word document try: word_path = generate_word_report_with_real_data( data, columns, config, ttest_results, avg_metrics, summary_tables, ai_insights, word_output_file ) print(f"[+] Word document saved: {word_path}") except Exception as e: print(f"[!] Word document generation failed: {e}") print(" Excel file was still created successfully") print() print("=" * 60) if success: print("[SUCCESS]") print() print(f"Your analysis is ready at:") print(f" šŸ“Š Excel: {output_file}") print(f" šŸ“„ Word: {word_output_file}") print() print("What's in the files:") print(f" [+] Row 1 Headers:") print(f" - 'User characteristics' (B1:C1)") print(f" - 'Time' (D1:S1) - 16 columns with SSD/ALT/Overall aggregates") print(f" - 'Accuracy' (T1:AI1) - 16 columns with SSD/ALT/Overall aggregates") print(f" - 'Satisfaction' (AJ1:AY1) - 16 columns with SSD/ALT/Overall aggregates") print(f" - 'Graph Questions' (AZ1:BK1) - 12 columns with Total/Avg aggregates") print(f" [+] Row 2: Column headers") print(f" - Unique UserId | pre_study_testscore | pre_test_binary") print(f" - Time: EQU-1 to ENR-10, Total/Avg SSD, Total/Avg ALT, Total/Avg Overall Time") print(f" - Accuracy: EQU-1 to ENR-10, Total/Avg SSD, Total/Avg ALT, Total/Avg Overall Acc") print(f" - Satisfaction: EQU-1 to ENR-10, Total/Avg SSD, Total/Avg ALT, Total/Avg Overall Sat") print(f" - Graph Questions: EQU-1-1/2 to ENR-5-1/2, Total/Avg GQ Score") print(f" [+] Row 3-{3+len(data)-1}: {len(data)} users with complete data") print(f" [+] Color-coded sections (Orange, Green, Blue, Plum, Light Gray)") print(f" [+] Bold horizontal line separating Experts from Novices") print(f" [+] Row {3+len(data)}: AVERAGE row (orange background, white text)") print(f" Note: pre_study_testscore excluded from average") print(f" [+] Bold vertical borders separating each section visually") print(f" [+] Light grey borders on all colored cells for better distinction") print(f" [+] Freeze panes enabled: Headers and first column stay visible while scrolling") print(f" [+] T-Test Analysis tables below the data:") print(f" - TIME T-Test positioned under Time section columns") print(f" - ACCURACY T-Test positioned under Accuracy section columns") print(f" - SATISFACTION T-Test positioned under Satisfaction section columns") print(f" - GRAPH QUESTIONS T-Test positioned under Graph Questions section columns") print(f" - Time/Accuracy/Satisfaction compare Experts vs Novices for:") print(f" * SSD (1-5)") print(f" * ALT (6-10)") print(f" * Aggregates (Avg SSD, Avg ALT)") print(f" * TOTAL OVERALL (Highlighted row with overall average comparison)") print(f" - Graph Questions compares Experts vs Novices for:") print(f" * All 10 questions (EQU-1-1/2, STU-2-1/2, GOA-3-1/2, PAT-4-1/2, ENR-5-1/2)") print(f" * Aggregates (Total GQ Score, Avg GQ Score)") print(f" * TOTAL OVERALL (Highlighted row with overall average comparison)") print(f" [+] Average Metric tables by Graph Type (EQU, STU, GOA, PAT, ENR):") print(f" - Average Time in Seconds table under Time section") print(f" - Average Accuracy table under Accuracy section") print(f" - Average Satisfaction table under Satisfaction section") print(f" - Average Graph Questions Performance table under Graph Questions section") print(f" - Time/Accuracy/Satisfaction show SSD vs ALT values for all graph types") print(f" - Graph Questions shows GQ1 vs GQ2 values for all graph types") print(f" [+] Overall Summary tables (SSD vs ALT comparison):") print(f" - OVERALL TIME: SSD vs ALT (under Time section)") print(f" - OVERALL ACCURACY: SSD vs ALT (under Accuracy section)") print(f" - OVERALL SATISFACTION: SSD vs ALT (under Satisfaction section)") if config.get('openai', {}).get('enabled', False): print(f" [+] AI-Generated Insights (second sheet):") print(f" - Comprehensive analysis of experimental results") print(f" - Key insights, recommendations, and practical implications") print(f" [+] Word Document Features:") print(f" - Professional academic report format") print(f" - All analysis tables in clean, readable format") print(f" - Statistical t-test results with interpretation") print(f" - Performance metrics by graph type") print(f" - Overall summary tables (SSD vs ALT)") print(f" - AI-generated insights and recommendations") print(f" - Experiment summary and participant statistics") # Count experts and novices by database l1_experts = sum(1 for row in data if row['pre_test_binary'] == 'expert' and row['unique_userId'].startswith('L1_')) l2_experts = sum(1 for row in data if row['pre_test_binary'] == 'expert' and row['unique_userId'].startswith('L2_')) l1_novices = sum(1 for row in data if row['pre_test_binary'] == 'novice' and row['unique_userId'].startswith('L1_')) l2_novices = sum(1 for row in data if row['pre_test_binary'] == 'novice' and row['unique_userId'].startswith('L2_')) print(f" 1. L1 experts: {l1_experts}") print(f" 2. L2 experts: {l2_experts}") print(f" 3. L1 novices: {l1_novices}") print(f" 4. L2 novices: {l2_novices}") print() print("[+] The code is now modular and reusable!") print(" Analysis modules are in: src/statflow/analysis/") print(" - statistical_analysis.py: T-test tables") print(" - table_generators.py: Average metrics and summaries") print(" - ai_insights.py: AI-powered insights") else: print("[!] Failed to create Excel file") print(" Check the error messages above") print("=" * 60) print() if __name__ == "__main__": main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Rucha-Nandgirikar/statflow'

If you have feedback or need assistance with the MCP directory API, please join our Discord server