We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/ravipesala/spark_mcp_optimizer'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
job_actions_in_loop.py•1.09 kB
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
def main():
"""
BAD: Actions inside loops causing repeated computation.
This is one of the most common Spark anti-patterns.
"""
spark = SparkSession.builder.appName("ActionsInLoopJob").getOrCreate()
df = spark.range(1000000).toDF("id")
df = df.withColumn("value", F.rand() * 100)
results = []
# BAD: Action (count) inside loop
# Each iteration triggers a full job execution
for i in range(10):
filtered = df.filter(F.col("value") > i * 10)
count = filtered.count() # ACTION - triggers full execution each time
results.append(count)
print(f"Iteration {i}: {count}")
# BAD: Multiple actions on same DataFrame
total = df.count() # Full scan
max_val = df.agg(F.max("value")).collect()[0][0] # Another full scan
min_val = df.agg(F.min("value")).collect()[0][0] # Yet another full scan
print(f"Total: {total}, Max: {max_val}, Min: {min_val}")
spark.stop()
if __name__ == "__main__":
main()