import subprocess
import os
import sys
SPARK_SUBMIT = "/Users/user/Documents/bigdata_stack/spark-3.5.3-bin-hadoop3/bin/spark-submit"
examples_to_run = [
"examples/ex_partitioning.py",
"examples/ex_resource_overhead.py",
"examples/ex_skew_join.py",
"examples/ex_spill_memory.py",
"examples/job_broadcast_misuse.py",
"examples/job_cartesian.py",
"examples/job_gc_pressure.py",
"examples/job_missing_predicate.py",
"examples/job_skew.py",
"examples/job_small_files.py",
"examples/job_spill.py",
"examples/fixed/job_skew_fixed.py",
"examples/fixed/job_spill_fixed.py"
]
def run_example(example_path):
print(f"Running {example_path}...")
try:
# Run synchronous
result = subprocess.run(
[SPARK_SUBMIT, example_path],
capture_output=True,
text=True,
check=True
)
print(f"✅ {example_path} completed successfully.")
return True
except subprocess.CalledProcessError as e:
print(f"❌ {example_path} failed.")
print(f"Error output:\n{e.stderr}")
return False
def main():
print("=== Running Remaining Examples ===")
failed = []
for ex in examples_to_run:
full_path = os.path.abspath(ex)
if not os.path.exists(full_path):
print(f"⚠️ File not found: {full_path}")
failed.append(ex)
continue
if not run_example(full_path):
failed.append(ex)
print("\n=== Summary ===")
if failed:
print(f"❌ {len(failed)} examples failed:")
for f in failed:
print(f" - {f}")
sys.exit(1)
else:
print("✅ All examples ran successfully!")
if __name__ == "__main__":
main()