asears · December 2, 2023 11:58
diff --git a/sparkaqe.ini b/sparkaqe.ini
 # https://docs.databricks.com/en/optimizations/aqe.html#configuration

 # disable
 # spark.databricks.optimizer.adaptive.enabled false
 # default
 spark.databricks.optimizer.adaptive.enabled true

 # default number of partitions to use for shuffling data for joins or aggs
 # for small files, set a lower shuffle number
 # default
 # spark.sql.shuffle.partitions 200
 # auto enables auto-optimized shufffle
 spark.sql.shuffle.partitions auto

 # switch sort merge to broadcast hash without a hint.   
 # Increase this for larger 'smaller' tables with larger worker sizes, 
 # as the tables can be broadcast to each worker for perf
 # default
 spark.databricks.adaptive.autoBroadcastJoinThreshold 30MB

 # enable or disable partition coalescing
 # merge partitions into fewer partitions to reduce the overhead when shuffling large datasets
 # behind scenes it is using repartition() or coalesce() which could be used to force
 # default
 spark.sql.adaptive.coalescePartitions.enabled true

 # The target size after coalescing. 
 # The coalesced partition sizes will be close to but no bigger than this target size.
 # default
 spark.sql.adaptive.advisoryPartitionSizeInBytes 64MB

 # The minimum size of partitions after coalescing. 
 # The coalesced partition sizes will be no smaller than this size.
 # default
 spark.sql.adaptive.coalescePartitions.minPartitionSize 1MB

 # The minimum number of partitions after coalescing. 
 # Not recommended, because setting explicitly overrides spark.sql.adaptive.coalescePartitions.minPartitionSize.

 # default 2x no cluster cores
 # spark.sql.adaptive.coalescePartitions.minPartitionNum 

 # Dynamic skew join
 # spark.sql.adaptive.skewJoin.enabled false
 # default
 spark.sql.adaptive.skewJoin.enabled true

 # A factor that when multiplied by the median partition size contributes to determining whether a partition is skewed.
 # default
 spark.sql.adaptive.skewJoin.skewedPartitionFactor 5

 # A threshold that contributes to determining whether a partition is skewed.
 spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes 256MB

 # Dynamically detect and propogate empty relations
 spark.databricks.adaptive.emptyRelationPropagation.enabled true
	# https://docs.databricks.com/en/optimizations/aqe.html#configuration

	# disable
	# spark.databricks.optimizer.adaptive.enabled false
	# default
	spark.databricks.optimizer.adaptive.enabled true

	# default number of partitions to use for shuffling data for joins or aggs
	# for small files, set a lower shuffle number
	# default
	# spark.sql.shuffle.partitions 200
	# auto enables auto-optimized shufffle
	spark.sql.shuffle.partitions auto

	# switch sort merge to broadcast hash without a hint.
	# Increase this for larger 'smaller' tables with larger worker sizes,
	# as the tables can be broadcast to each worker for perf
	# default
	spark.databricks.adaptive.autoBroadcastJoinThreshold 30MB

	# enable or disable partition coalescing
	# merge partitions into fewer partitions to reduce the overhead when shuffling large datasets
	# behind scenes it is using repartition() or coalesce() which could be used to force
	# default
	spark.sql.adaptive.coalescePartitions.enabled true

	# The target size after coalescing.
	# The coalesced partition sizes will be close to but no bigger than this target size.
	# default
	spark.sql.adaptive.advisoryPartitionSizeInBytes 64MB

	# The minimum size of partitions after coalescing.
	# The coalesced partition sizes will be no smaller than this size.
	# default
	spark.sql.adaptive.coalescePartitions.minPartitionSize 1MB

	# The minimum number of partitions after coalescing.
	# Not recommended, because setting explicitly overrides spark.sql.adaptive.coalescePartitions.minPartitionSize.

	# default 2x no cluster cores
	# spark.sql.adaptive.coalescePartitions.minPartitionNum

	# Dynamic skew join
	# spark.sql.adaptive.skewJoin.enabled false
	# default
	spark.sql.adaptive.skewJoin.enabled true

	# A factor that when multiplied by the median partition size contributes to determining whether a partition is skewed.
	# default
	spark.sql.adaptive.skewJoin.skewedPartitionFactor 5

	# A threshold that contributes to determining whether a partition is skewed.
	spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes 256MB

	# Dynamically detect and propogate empty relations
	spark.databricks.adaptive.emptyRelationPropagation.enabled true
No results found