from pyspark.sql import SparkSession
import pyspark.sql.functions as F
def main():
spark = SparkSession.builder \
.appName("GCPressureJob") \
.config("spark.executor.memory", "512m") \
.config("spark.memory.fraction", "0.3") \
.getOrCreate()
# Create many objects to cause GC pressure
df = spark.range(500000)
# Multiple transformations creating many intermediate objects
for i in range(10):
df = df.withColumn(f"col_{i}", F.rand() * 100)
df = df.withColumn(f"str_{i}", F.concat(F.lit("value_"), F.col(f"col_{i}").cast("string")))
# Collect to force execution
result = df.groupBy(F.col("id") % 100).count()
print(f"Count: {result.count()}")
spark.stop()
if __name__ == "__main__":
main()