We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/ravipesala/spark_mcp_optimizer'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
ex_partitioning.py•895 B
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
def main():
# 4. Suboptimal Partitioning
spark = SparkSession.builder \
.appName("Ex_Partitioning") \
.getOrCreate()
df = spark.range(0, 1000000)
print("Triggering Small Partitions...")
# 1M rows is ~8MB. 200 partitions = 40KB per partition.
# Way below 64MB target.
# Should trigger "Suboptimal Partitioning" (Small)
df.repartition(200).groupBy(col("id") % 10).count().collect()
print("Triggering Large Partitions...")
# Coalesce to 1 partition for 1M rows. Still small (8MB), so difficult to trigger "Large"
# on local machine without generating GBs of data.
# But effectively demonstrates manual partitioning control.
df.coalesce(1).write.format("noop").mode("overwrite").save()
spark.stop()
if __name__ == "__main__":
main()