We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/ravipesala/spark_mcp_optimizer'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
job_caching_misuse.py•1.03 kB
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
def main():
"""
BAD: Caching without unpersist, causing memory leaks.
This job repeatedly caches DataFrames without releasing them.
"""
spark = SparkSession.builder \
.appName("CachingMisuseJob") \
.config("spark.executor.memory", "1g") \
.getOrCreate()
# Create base DataFrame
df = spark.range(100000).toDF("id")
# BAD: Cache multiple times without unpersist
for i in range(10):
# Each iteration creates a new cached DataFrame
temp_df = df.withColumn(f"col_{i}", F.rand() * 100)
temp_df.cache() # Memory leak - never unpersisted
temp_df.count() # Force caching
# temp_df goes out of scope but remains cached
# BAD: Unnecessary caching of small DataFrames
small_df = spark.range(10).toDF("id")
small_df.cache() # Overhead > benefit for tiny data
small_df.show()
spark.stop()
if __name__ == "__main__":
main()