From 2ffabc6f3a3d2d154f2a7441240a17fb371735fe Mon Sep 17 00:00:00 2001
From: "Ahmed Hussein (amahussein)"
Date: Tue, 28 Jan 2025 11:47:12 -0600
Subject: [PATCH 1/7] Generate a separate file to list bootstrap properties
Signed-off-by: Ahmed Hussein (amahussein)
Fixes #1509
- Add a new report to generate appId-bootstrap.conf
- Add more context to the AutoTuner results
- Add a yaml file to initialize the list of the bootstrap entries
---
.../main/resources/bootstrap/tuningTable.yaml | 152 +++++++++++++++
.../rapids/tool/profiling/Profiler.scala | 8 +-
.../spark/rapids/tool/tuning/AutoTuner.scala | 135 +++----------
.../rapids/tool/tuning/BootstrapReport.scala | 49 +++++
.../tuning/QualificationAutoTunerRunner.scala | 7 +-
.../rapids/tool/tuning/TunerContext.scala | 4 +-
.../rapids/tool/tuning/TuningEntry.scala | 70 +++++++
.../tool/tuning/TuningEntryDefinition.scala | 90 +++++++++
.../rapids/tool/tuning/TuningEntryTrait.scala | 183 ++++++++++++++++++
.../rapids/tool/tuning/TuningOpTypes.scala | 36 ++++
.../tool/tuning/ProfilingAutoTunerSuite.scala | 16 +-
11 files changed, 637 insertions(+), 113 deletions(-)
create mode 100644 core/src/main/resources/bootstrap/tuningTable.yaml
create mode 100644 core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/BootstrapReport.scala
create mode 100644 core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntry.scala
create mode 100644 core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryDefinition.scala
create mode 100644 core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala
create mode 100644 core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningOpTypes.scala
diff --git a/core/src/main/resources/bootstrap/tuningTable.yaml b/core/src/main/resources/bootstrap/tuningTable.yaml
new file mode 100644
index 000000000..a5c1e443f
--- /dev/null
+++ b/core/src/main/resources/bootstrap/tuningTable.yaml
@@ -0,0 +1,152 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+tuningDefinitions:
+ - label: spark.executor.cores
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
+ - label: spark.executor.instances
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
+ - label: spark.executor.memory
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
+ - label: spark.executor.memoryOverhead
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
+ - label: spark.executor.memoryOverheadFactor
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
+ - label: spark.kubernetes.memoryOverheadFactor
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
+ - label: spark.rapids.filecache.enabled
+ description: ''
+ enabled: true
+ level: job
+ - label: spark.rapids.memory.pinnedPool.size
+ description: ''
+ enabled: true
+ level: cluster
+ - label: spark.rapids.shuffle.multiThreaded.maxBytesInFlight
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
+ - label: spark.rapids.shuffle.multiThreaded.reader.threads
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
+ - label: spark.rapids.shuffle.multiThreaded.writer.threads
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
+ - label: spark.rapids.sql.batchSizeBytes
+ description: ''
+ enabled: true
+ level: job
+ category: tuning
+ - label: spark.rapids.sql.concurrentGpuTasks
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
+ - label: spark.rapids.sql.format.parquet.multithreaded.combine.waitTime
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
+ - label: spark.rapids.sql.enabled
+ description: 'should be true to enable SQL operations on the GPU.'
+ enabled: true
+ level: cluster
+ category: functionality
+ - label: spark.rapids.sql.multiThreadedRead.numThreads
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
+ - label: spark.rapids.sql.reader.multithreaded.combine.sizeBytes
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
+ - label: spark.shuffle.manager
+ description: ''
+ enabled: true
+ level: cluster
+ - label: spark.sql.adaptive.enabled
+ description: ''
+ enabled: true
+ level: job
+ category: tuning
+ - label: spark.sql.adaptive.advisoryPartitionSizeInBytes
+ description: ''
+ enabled: true
+ level: job
+ category: tuning
+ - label: spark.sql.adaptive.coalescePartitions.initialPartitionNum
+ description: ''
+ enabled: true
+ level: job
+ category: tuning
+ - label: spark.sql.adaptive.coalescePartitions.minPartitionNum
+ description: ''
+ enabled: true
+ level: job
+ category: tuning
+ - label: spark.sql.adaptive.coalescePartitions.minPartitionSize
+ description: ''
+ enabled: true
+ level: job
+ category: tuning
+ - label: spark.sql.adaptive.coalescePartitions.parallelismFirst
+ description: ''
+ enabled: true
+ level: job
+ category: tuning
+ - label: spark.sql.adaptive.autoBroadcastJoinThreshold
+ description: ''
+ enabled: true
+ level: job
+ category: tuning
+ - label: spark.sql.files.maxPartitionBytes
+ description: ''
+ enabled: true
+ level: job
+ category: tuning
+ - label: spark.sql.shuffle.partitions
+ description: ''
+ enabled: true
+ level: job
+ category: tuning
+ - label: spark.task.resource.gpu.amount
+ description: ''
+ enabled: true
+ level: cluster
+ category: tuning
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala
index 2a7069296..620509804 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.{ArrayBuffer, HashMap}
import scala.util.control.NonFatal
import com.nvidia.spark.rapids.tool.{AppSummaryInfoBaseProvider, EventLogInfo, EventLogPathProcessor, FailedEventLog, Platform, PlatformFactory, ToolBase}
-import com.nvidia.spark.rapids.tool.tuning.{AutoTuner, ProfilingAutoTunerConfigsProvider}
+import com.nvidia.spark.rapids.tool.tuning.{AutoTuner, ProfilingAutoTunerConfigsProvider, TuningEntryTrait}
import com.nvidia.spark.rapids.tool.views._
import org.apache.hadoop.conf.Configuration
@@ -416,7 +416,7 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs, enablePB: Boolea
*/
private def runAutoTuner(appInfo: Option[ApplicationSummaryInfo],
driverInfoProvider: DriverLogInfoProvider = BaseDriverLogInfoProvider.noneDriverLog)
- : (Seq[RecommendedPropertyResult], Seq[RecommendedCommentResult]) = {
+ : (Seq[TuningEntryTrait], Seq[RecommendedCommentResult]) = {
// only run the auto tuner on GPU event logs for profiling tool right now. There are
// assumptions made in the code
if (appInfo.isDefined && appInfo.get.appInfo.head.pluginEnabled) {
@@ -611,10 +611,10 @@ object Profiler {
val COMBINED_LOG_FILE_NAME_PREFIX = "rapids_4_spark_tools_combined"
val SUBDIR = "rapids_4_spark_profile"
- def getAutoTunerResultsAsString(props: Seq[RecommendedPropertyResult],
+ def getAutoTunerResultsAsString(props: Seq[TuningEntryTrait],
comments: Seq[RecommendedCommentResult]): String = {
val propStr = if (props.nonEmpty) {
- val propertiesToStr = props.map(_.toString).reduce(_ + "\n" + _)
+ val propertiesToStr = props.map(_.toConfString).reduce(_ + "\n" + _)
s"\nSpark Properties:\n$propertiesToStr\n"
} else {
"Cannot recommend properties. See Comments.\n"
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
index 76232e38c..748f3a1d7 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
@@ -194,74 +194,6 @@ class ClusterProperties(
s"{${system.toString}, ${gpu.toString}, $softwareProperties}"
}
-/**
- * Wrapper to hold the recommendation of a given criterion.
- *
- * @param name the property label.
- * @param original the value loaded from the spark properties.
- * @param recommended the recommended value by the AutoTuner.
- */
-class RecommendationEntry(val name: String,
- val original: Option[String],
- var recommended: Option[String]) {
-
- def setRecommendedValue(value: String): Unit = {
- recommended = Option(value)
- }
-
- /**
- * Used to compare between two properties by converting memory units to
- * a equivalent representations.
- * @param propValue property to be processed.
- * @return the uniform representation of property.
- * For Memory, the value is converted to bytes.
- */
- private def getRawValue(propValue: Option[String]): Option[String] = {
- propValue match {
- case None => None
- case Some(value) =>
- if (StringUtils.isMemorySize(value)) {
- // if it is memory return the bytes unit
- Some(s"${StringUtils.convertMemorySizeToBytes(value)}")
- } else {
- propValue
- }
- }
- }
-
- /**
- * Returns true when the recommendation is different than the original.
- */
- private def recommendsNewValue(): Boolean = {
- val originalVal = getRawValue(original)
- val recommendedVal = getRawValue(recommended)
- (originalVal, recommendedVal) match {
- case (None, None) => false
- case (Some(orig), Some(rec)) =>
- orig != rec
- case _ => true
- }
- }
-
- /**
- * True or False whether the recommendation is valid. e.g., recommendations that does not change
- * the original value returns false if filter is enabled.
- * @param filterByUpdated flag to pick only the properties that would be updated by the
- * recommendations
- */
- def isValid(filterByUpdated: Boolean): Boolean = {
- recommended match {
- case None => false
- case _ =>
- if (filterByUpdated) { // filter enabled
- recommendsNewValue()
- } else {
- true
- }
- }
- }
-}
-
/**
* Represents different Spark master types.
*/
@@ -358,8 +290,8 @@ class AutoTuner(
extends Logging {
var comments = new mutable.ListBuffer[String]()
- var recommendations: mutable.LinkedHashMap[String, RecommendationEntry] =
- mutable.LinkedHashMap[String, RecommendationEntry]()
+ var recommendations: mutable.LinkedHashMap[String, TuningEntryTrait] =
+ mutable.LinkedHashMap[String, TuningEntryTrait]()
// list of recommendations to be skipped for recommendations
// Note that the recommendations will be computed anyway to avoid breaking dependencies.
private val skippedRecommendations: mutable.HashSet[String] = mutable.HashSet[String]()
@@ -392,7 +324,7 @@ class AutoTuner(
autoTunerConfigsProvider.recommendationsTarget.foreach { key =>
// no need to add new records if they are missing from props
getPropertyValue(key).foreach { propVal =>
- val recommendationVal = new RecommendationEntry(key, Option(propVal), None)
+ val recommendationVal = TuningEntry.build(key, Option(propVal), None)
recommendations(key) = recommendationVal
}
}
@@ -401,10 +333,10 @@ class AutoTuner(
def appendRecommendation(key: String, value: String): Unit = {
if (!skippedRecommendations.contains(key)) {
val recomRecord = recommendations.getOrElseUpdate(key,
- new RecommendationEntry(key, getPropertyValue(key), None))
+ TuningEntry.build(key, getPropertyValue(key), None))
if (value != null) {
recomRecord.setRecommendedValue(value)
- if (recomRecord.original.isEmpty) {
+ if (recomRecord.originalValue.isEmpty) {
// add a comment that the value was missing in the cluster properties
appendComment(s"'$key' was not set.")
}
@@ -1063,13 +995,14 @@ class AutoTuner(
case Some(f) => f.contains("com.nvidia.spark.SQLPlugin")
case None => false
}
- val rapidsEnabled = getPropertyValue("spark.rapids.sql.enabled") match {
- case Some(f) => f.toBoolean
- case None => true
- }
- if (!rapidsEnabled) {
- appendRecommendation("spark.rapids.sql.enabled", "true")
- }
+// val rapidsEnabled = getPropertyValue("spark.rapids.sql.enabled") match {
+// case Some(f) => f.toBoolean
+// case None => true
+// }
+// if (!rapidsEnabled) {
+//
+// }
+ appendRecommendation("spark.rapids.sql.enabled", "true")
if (!isPluginLoaded) {
appendComment("RAPIDS Accelerator for Apache Spark jar is missing in \"spark.plugins\". " +
"Please refer to " +
@@ -1119,18 +1052,23 @@ class AutoTuner(
comments.map(RecommendedCommentResult).sortBy(_.comment)
}
- private def toRecommendationsProfileResult: Seq[RecommendedPropertyResult] = {
- val finalRecommendations =
- recommendations.filter(elem => elem._2.isValid(filterByUpdatedPropertiesEnabled))
- finalRecommendations.collect {
- case (key, record) => RecommendedPropertyResult(key, record.recommended.get)
- }.toSeq.sortBy(_.property)
+ private def toRecommendationsProfileResult: Seq[TuningEntryTrait] = {
+ val recommendationEntries = if (filterByUpdatedPropertiesEnabled) {
+ recommendations.values.filter(_.isTuned())
+ } else {
+ recommendations.values.filter(_.isEnabled())
+ }
+ recommendationEntries.toSeq.sortBy(_.name)
+ }
+
+ protected def finalizeTuning(): Unit = {
+ recommendations.values.foreach(_.commit())
}
/**
* The Autotuner loads the spark properties from either the ClusterProperties or the eventlog.
- * 1- runs the calculation for each criterion and saves it as a [[RecommendationEntry]].
- * 2- The final list of recommendations include any [[RecommendationEntry]] that has a
+ * 1- runs the calculation for each criterion and saves it as a [[TuningEntryTrait]].
+ * 2- The final list of recommendations include any [[TuningEntryTrait]] that has a
* recommendation that is different from the original property.
* 3- Null values are excluded.
* 4- A comment is added for each missing property in the spark property.
@@ -1149,7 +1087,7 @@ class AutoTuner(
skipList: Option[Seq[String]] = Some(Seq()),
limitedLogicList: Option[Seq[String]] = Some(Seq()),
showOnlyUpdatedProps: Boolean = true):
- (Seq[RecommendedPropertyResult], Seq[RecommendedCommentResult]) = {
+ (Seq[TuningEntryTrait], Seq[RecommendedCommentResult]) = {
if (appInfoProvider.isAppInfoAvailable) {
limitedLogicList.foreach(limitedSeq => limitedLogicRecommendations ++= limitedSeq)
skipList.foreach(skipSeq => skippedRecommendations ++= skipSeq)
@@ -1178,6 +1116,7 @@ class AutoTuner(
}
}
recommendFromDriverLogs()
+ finalizeTuning()
(toRecommendationsProfileResult, toCommentProfileResult)
}
@@ -1192,13 +1131,13 @@ class AutoTuner(
// Combines the original Spark properties with the recommended ones.
def combineSparkProperties(
- recommendedSet: Seq[RecommendedPropertyResult]): Seq[RecommendedPropertyResult] = {
+ recommendedSet: Seq[TuningEntryTrait]): Seq[RecommendedPropertyResult] = {
// get the original properties after filtering the and removing unnecessary keys
val originalPropsFiltered = processPropKeys(getAllProperties)
// Combine the original properties with the recommended properties.
// The recommendations should always override the original ones
val combinedProps = (originalPropsFiltered
- ++ recommendedSet.map(r => r.property -> r.value).toMap).toSeq.sortBy(_._1)
+ ++ recommendedSet.map(r => r.name -> r.getTuneValue()).toMap).toSeq.sortBy(_._1)
combinedProps.collect {
case (pK, pV) => RecommendedPropertyResult(pK, pV)
}
@@ -1282,19 +1221,7 @@ trait AutoTunerConfigsProvider extends Logging {
"'spark.sql.adaptive.enabled' should be enabled for better performance."
) ++ commentsForMissingMemoryProps
- val recommendationsTarget: Seq[String] = Seq[String](
- "spark.executor.instances",
- "spark.rapids.sql.enabled",
- "spark.executor.cores",
- "spark.executor.memory",
- "spark.rapids.sql.concurrentGpuTasks",
- "spark.task.resource.gpu.amount",
- "spark.sql.shuffle.partitions",
- "spark.sql.files.maxPartitionBytes",
- "spark.rapids.memory.pinnedPool.size",
- "spark.executor.memoryOverhead",
- "spark.executor.memoryOverheadFactor",
- "spark.kubernetes.memoryOverheadFactor")
+ lazy val recommendationsTarget: Iterable[String] = TuningEntryDefinition.TUNING_TABLE.keys
val classPathComments: Map[String, String] = Map(
"rapids.jars.missing" ->
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/BootstrapReport.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/BootstrapReport.scala
new file mode 100644
index 000000000..9f834abba
--- /dev/null
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/BootstrapReport.scala
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.tool.tuning
+
+import com.nvidia.spark.rapids.tool.ToolTextFileWriter
+import org.apache.hadoop.conf.Configuration
+
+/**
+ * A class that generates the report containing only the required and tuned configurations.
+ * @param tuningResult The result of the tuning process
+ * @param outputDir The directory where the report will be written.
+ * @param hadoopConf The Hadoop configuration
+ */
+class BootstrapReport(tuningResult: TuningResult,
+ outputDir: String, hadoopConf: Configuration) {
+
+ /**
+ * Loads the bootstrap entries from the tuning result. This applies for any entry that is
+ * not removed.
+ * @return the list of bootstrap entries
+ */
+ private def loadBootstrapEntries(): Seq[TuningEntryTrait] = {
+ tuningResult.recommendations.filter(e => e.isEnabled() && e.isBootstrap() && !e.isRemoved())
+ }
+ def generateReport(): Unit = {
+ val textFileWriter = new ToolTextFileWriter(outputDir,
+ s"${tuningResult.appID}-bootstrap.conf",
+ s"Required and Tuned configurations to run - ${tuningResult.appID}", Option(hadoopConf))
+ try {
+ textFileWriter.write(loadBootstrapEntries().map(_.toConfString).reduce(_ + "\n" + _))
+ } finally {
+ textFileWriter.close()
+ }
+ }
+}
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerRunner.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerRunner.scala
index cd4c74c17..5fad6ae51 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerRunner.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerRunner.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -50,6 +50,9 @@ class QualificationAutoTunerRunner(val appInfoProvider: QualAppSummaryInfoProvid
} finally {
textFileWriter.close()
}
+ // Write down the recommended properties
+ val bootstrapReport = new BootstrapReport(tuningResult, outputDir, hadoopConf)
+ bootstrapReport.generateReport()
// Write down the combined configurations
tuningResult.combinedProps.collect {
case combinedProps =>
@@ -77,7 +80,7 @@ class QualificationAutoTunerRunner(val appInfoProvider: QualAppSummaryInfoProvid
// Otherwise, it is difficult to separate them logically.
val combinedProps = autoTuner.combineSparkProperties(recommendations)
val resultRecord = TuningResult(appInfoProvider.getAppID, recommendations,
- comments, Option(combinedProps))
+ comments, combinedProps = Option(combinedProps))
writeTuningReport(resultRecord, tunerContext.getOutputPath, tunerContext.hadoopConf)
resultRecord
}
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TunerContext.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TunerContext.scala
index bbf7fb9b6..c0f84696d 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TunerContext.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TunerContext.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -29,7 +29,7 @@ import org.apache.spark.sql.rapids.tool.util.RapidsToolsConfUtil
case class TuningResult(
appID: String,
- recommendations: Seq[RecommendedPropertyResult],
+ recommendations: Seq[TuningEntryTrait],
comments: Seq[RecommendedCommentResult],
combinedProps: Option[Seq[RecommendedPropertyResult]] = None)
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntry.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntry.scala
new file mode 100644
index 000000000..086cc50ea
--- /dev/null
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntry.scala
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.tool.tuning
+
+import org.apache.spark.internal.Logging
+
+/**
+ * A wrapper to the hold the tuning entry information.
+ * @param name the name of the property
+ * @param originalValue the value from the eventlog
+ * @param tunedValue the value recommended by the AutoTuner
+ * @param definition the definition of the tuning entry.
+ */
+class TuningEntry(
+ override val name: String,
+ override val originalValue: Option[String],
+ override var tunedValue: Option[String],
+ definition: Option[TuningEntryDefinition] = None) extends TuningEntryTrait {
+
+ override def isBootstrap(): Boolean = {
+ definition match {
+ case Some(defn) => defn.isBootstrap()
+ case None => name.startsWith("spark.rapids.")
+ }
+ }
+
+ override def isEnabled(): Boolean = {
+ val globalFlag = definition match {
+ case Some(defn) => defn.isEnabled()
+ case None => true
+ }
+ globalFlag && enabled
+ }
+}
+
+object TuningEntry extends Logging {
+ /**
+ * Build a TuningEntry object and automatically pull the information from Tuning Entry Table.
+ * @param name the property label
+ * @param originalValue the original value from the eventlog
+ * @param tunedValue the value recommended by the AutoTuner
+ * @return a TuningEntry object
+ */
+ def build(
+ name: String,
+ originalValue: Option[String],
+ tunedValue: Option[String]): TuningEntry = {
+ // pul the information from Tuning Entry Table
+ val tuningDefinition = TuningEntryDefinition.TUNING_TABLE.get(name)
+ // for debugging purpose
+ if (tuningDefinition.isEmpty) {
+ logInfo("Tuning Entry is not defined for " + name)
+ }
+ new TuningEntry(name, originalValue, tunedValue, tuningDefinition)
+ }
+}
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryDefinition.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryDefinition.scala
new file mode 100644
index 000000000..b6261ccd5
--- /dev/null
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryDefinition.scala
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.tool.tuning
+
+import scala.beans.BeanProperty
+
+import org.yaml.snakeyaml.{DumperOptions, LoaderOptions, Yaml}
+import org.yaml.snakeyaml.constructor.Constructor
+import org.yaml.snakeyaml.representer.Representer
+import scala.collection.JavaConverters._
+import scala.collection.breakOut
+
+import org.apache.spark.sql.rapids.tool.util.UTF8Source
+
+/**
+ * A wrapper to the hold the tuning entry information.
+ * @param label the property name
+ * @param description used to explain the importance of that property and how it is used
+ * @param enabled global flag to enable/disable the tuning entry. This is used to turn off a
+ * tuning entry
+ * @param level This is used to group the tuning entries (job/cluster)
+ * @param category Indicates the purpose of that property for RAPIDS.
+ * "functionality": required to enable RAPIDS
+ * "tuning": required to tune the runtime.
+ * @param bootstrapEntry When true, the property should be added to the bootstrap configuration.
+ * Default is true.
+ */
+class TuningEntryDefinition(
+ @BeanProperty var label: String,
+ @BeanProperty var description: String,
+ @BeanProperty var enabled: Boolean,
+ @BeanProperty var level: String,
+ @BeanProperty var category: String,
+ @BeanProperty var bootstrapEntry: Boolean) {
+ def this() = {
+ this("", "", enabled = true, "", "", bootstrapEntry = true)
+ }
+
+ def isEnabled(): Boolean = {
+ enabled
+ }
+ def isBootstrap(): Boolean = {
+ bootstrapEntry || label.startsWith("spark.rapids.")
+ }
+}
+
+class TuningEntries(
+ @BeanProperty var tuningDefinitions: java.util.List[TuningEntryDefinition]) {
+ def this() = {
+ this(new java.util.ArrayList[TuningEntryDefinition]())
+ }
+}
+
+
+object TuningEntryDefinition {
+ // A static Map between the propertyName and the TuningEntryDefinition
+ lazy val TUNING_TABLE: Map[String, TuningEntryDefinition] = loadTable()
+
+ /**
+ * Load the tuning table from the yaml file.
+ * @return a map between property name and the TuningEntryDefinition
+ */
+ private def loadTable(): Map[String, TuningEntryDefinition] = {
+ val yamlSource =
+ UTF8Source.fromResource("bootstrap/tuningTable.yaml").mkString
+ val representer = new Representer(new DumperOptions())
+ representer.getPropertyUtils.setSkipMissingProperties(true)
+ val constructor = new Constructor(classOf[TuningEntries], new LoaderOptions())
+ val yamlObjNested = new Yaml(constructor, representer)
+ val entryTable: TuningEntries = yamlObjNested.load(yamlSource).asInstanceOf[TuningEntries]
+ // load the enabled entries.
+ entryTable.tuningDefinitions.asScala.collect {
+ case e if e.isEnabled() => (e.label, e)
+ }(breakOut)
+ }
+}
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala
new file mode 100644
index 000000000..123b70a87
--- /dev/null
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.tool.tuning
+
+import scala.collection.mutable.ListBuffer
+
+import com.nvidia.spark.rapids.tool.tuning.TuningOpTypes.TuningOpType
+
+import org.apache.spark.sql.rapids.tool.util.StringUtils
+
+/**
+ * A trait that defines the behavior of the Tuning Entry.
+ */
+trait TuningEntryTrait {
+ val name: String
+ // The value recommended by the AutoTuner
+ var tunedValue: Option[String]
+ // The original value of the property from the event log
+ val originalValue: Option[String]
+ var enabled: Boolean = true
+
+ // The type of tuning operation to be performed
+ var tuningOpType: TuningOpType = TuningOpTypes.UNKNOWN
+ // Comments specific to the property
+ val comments: ListBuffer[String] = ListBuffer[String]()
+ // The value to be used when the property is unresolved.
+ var fillUnresolved: Option[String] = Some("[FILL_IN_VALUE]")
+
+ def toConfString: String = {
+ "--conf %s=%s".format(name, getTuneValue())
+ }
+
+ /**
+ * Indicates if the property is resolved by the AutoTuner or not. This is used to distinguish
+ * the properties that were not tuned due to a failure in the AutoTuner. For example,
+ * shuffleManager was not able to find the className but we still want to include this
+ * property in the bootstrap configuration.
+ * @param fill the value to be used to fill in the gap if necessary.
+ */
+ def markAsUnresolved(fill: Option[String] = None): Unit = {
+ setTuningOpType(TuningOpTypes.UNRESOLVED)
+ if (fill.isDefined) {
+ fillUnresolved = fill
+ }
+ }
+
+ /**
+ * Disables the property. This is used to turnoff the property if it is not applicable.
+ */
+ def disable(): Unit = {
+ enabled = false
+ }
+
+ /**
+ * Returns the value of the property as a string.
+ * When the tunedValue is not set, it will set it as [Fill_IN_VALUE] so that it can be replaced
+ * by the user. This is used because the AutoTuner may not be able to successfully make
+ * recommendations. Yet, we want to include that in the final tuning report.
+ * Note that the name is not "getTunedValue" because the purpose is different.
+ *
+ * @param fillIfBlank the value of the content if the TunedValue is empty
+ * @return the value of the property as a string.
+ */
+ def getTuneValue(fillIfBlank: Option[String] = None): String = {
+ if (isUnresolved()) {
+ fillIfBlank.getOrElse(fillUnresolved.get)
+ } else {
+ // It is possible the the propery was not tuned. However, we should not be in that case
+ // because by calling commit we must have copied the tuned from the original.
+ tunedValue.getOrElse(fillIfBlank.getOrElse(originalValue.getOrElse("[UNDEFINED]")))
+ }
+ }
+
+ /**
+ * Indicates that a specific configuration should be removed from the configuration.
+ */
+ def markAsRemoved(): Unit = {
+ tunedValue = None
+ setTuningOpType(TuningOpTypes.REMOVE)
+ }
+
+ def setRecommendedValue(value: String): Unit = {
+ tunedValue = Option(value)
+ updateOpType()
+ }
+
+ /**
+ * Indicates if the property is tuned. This is used to filter out the entries that stayed the
+ * same.
+ * @return true if it was changed by the AutoTuner or false otherwise.
+ */
+ def isTuned(): Boolean = {
+ isEnabled() && TuningOpTypes.isTuned(tuningOpType)
+ }
+
+ def isUnresolved(): Boolean = {
+ tuningOpType == TuningOpTypes.UNRESOLVED
+ }
+
+ /**
+ * Indicates if the property is removed by the AutoTuner
+ */
+ def isRemoved(): Boolean = {
+ tuningOpType == TuningOpTypes.REMOVE
+ }
+
+ /**
+ * Indicates if the property is a bootstrap property.
+ * A bootstrap property is a property that is required to be set by the AutoTuner
+ */
+ def isBootstrap(): Boolean
+
+ /**
+ * Indicates if the property is enabled.
+ */
+ def isEnabled(): Boolean
+
+ /**
+ * Used to compare between two properties by converting memory units to
+ * a equivalent representations.
+ * @param propValue property to be processed.
+ * @return the uniform representation of property.
+ * For Memory, the value is converted to bytes.
+ */
+ private def getRawValue(propValue: Option[String]): Option[String] = {
+ propValue match {
+ case None => None
+ case Some(value) =>
+ if (StringUtils.isMemorySize(value)) {
+ // if it is memory return the bytes unit
+ Some(s"${StringUtils.convertMemorySizeToBytes(value)}")
+ } else {
+ propValue
+ }
+ }
+ }
+
+ def setTuningOpType(opType: TuningOpType): Unit = {
+ tuningOpType = opType
+ }
+
+ /**
+ * Updates the tuning operation type based on the original and tuned values.
+ */
+ def updateOpType(): Unit = {
+ if (!(isRemoved() || isUnresolved())) {
+ val originalVal = getRawValue(originalValue)
+ val recommendedVal = getRawValue(tunedValue)
+ (originalVal, recommendedVal) match {
+ case (None, None) => setTuningOpType(TuningOpTypes.UNKNOWN)
+ case (Some(orig), Some(rec)) =>
+ if (orig != rec) {
+ setTuningOpType(TuningOpTypes.UPDATE)
+ } else {
+ setTuningOpType(TuningOpTypes.CLONE)
+ }
+ case (None, Some(_)) => setTuningOpType(TuningOpTypes.ADD)
+ case (Some(orig), None) =>
+ // It is possible that the property was not set bu the AutoTuner, then it means it should
+ // be copied from the original configuration.
+ setRecommendedValue(orig)
+ }
+ }
+ }
+
+ def commit(): Unit = {
+ updateOpType()
+ }
+}
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningOpTypes.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningOpTypes.scala
new file mode 100644
index 000000000..9b81729e4
--- /dev/null
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningOpTypes.scala
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.tool.tuning
+
+/**
+ * Enumerated type to define the different modifications that the AutoTuner performs on
+ * a sepecific property.
+ */
+object TuningOpTypes extends Enumeration {
+ type TuningOpType = Value
+ val ADD, // the property is added
+ REMOVE, // the property is removed
+ UPDATE, // the property is updated
+ CLONE, // the property is the same
+ UNRESOLVED, // the property is processed by the AutoTuner but the value is not resolved
+ UNKNOWN = Value
+
+ def isTuned(tuningOpType: TuningOpType): Boolean = {
+ tuningOpType == ADD || tuningOpType == UPDATE ||
+ tuningOpType == REMOVE || tuningOpType == UNRESOLVED
+ }
+}
diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala
index b67a873e8..a5ddab4b5 100644
--- a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala
+++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala
@@ -1138,6 +1138,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.rapids.sql.concurrentGpuTasks=2
+ |--conf spark.rapids.sql.enabled=true
|--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
@@ -1152,6 +1153,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.shuffle.multiThreaded.reader.threads' was not set.
|- 'spark.rapids.shuffle.multiThreaded.writer.threads' was not set.
|- 'spark.rapids.sql.batchSizeBytes' was not set.
+ |- 'spark.rapids.sql.enabled' was not set.
|- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set.
|- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
@@ -1782,7 +1784,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
val (properties, comments) = autoTuner.getRecommendedProperties()
// Assert recommendations are excluded in properties
- assert(properties.map(_.property).forall(autoTuner.platform.isValidRecommendation))
+ assert(properties.map(_.name).forall(autoTuner.platform.isValidRecommendation))
// Assert recommendations are skipped in comments
assert(comments.map(_.comment).forall(autoTuner.platform.isValidComment))
}
@@ -2580,6 +2582,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.rapids.sql.concurrentGpuTasks=2
+ |--conf spark.rapids.sql.enabled=true
|--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
@@ -2591,6 +2594,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.executor.memoryOverhead' was not set.
|- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set.
|- 'spark.rapids.sql.batchSizeBytes' was not set.
+ |- 'spark.rapids.sql.enabled' was not set.
|- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set.
|- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
@@ -2656,6 +2660,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.rapids.sql.concurrentGpuTasks=2
+ |--conf spark.rapids.sql.enabled=true
|--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
@@ -2667,6 +2672,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.executor.memoryOverhead' was not set.
|- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set.
|- 'spark.rapids.sql.batchSizeBytes' was not set.
+ |- 'spark.rapids.sql.enabled' was not set.
|- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set.
|- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
@@ -2721,6 +2727,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.rapids.sql.concurrentGpuTasks=2
+ |--conf spark.rapids.sql.enabled=true
|--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
@@ -2740,6 +2747,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.shuffle.multiThreaded.writer.threads' was not set.
|- 'spark.rapids.sql.batchSizeBytes' was not set.
|- 'spark.rapids.sql.concurrentGpuTasks' was not set.
+ |- 'spark.rapids.sql.enabled' was not set.
|- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set.
|- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
@@ -2796,6 +2804,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.rapids.sql.concurrentGpuTasks=2
+ |--conf spark.rapids.sql.enabled=true
|--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
@@ -2814,6 +2823,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.shuffle.multiThreaded.writer.threads' was not set.
|- 'spark.rapids.sql.batchSizeBytes' was not set.
|- 'spark.rapids.sql.concurrentGpuTasks' was not set.
+ |- 'spark.rapids.sql.enabled' was not set.
|- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set.
|- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
@@ -2870,6 +2880,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.rapids.sql.concurrentGpuTasks=2
+ |--conf spark.rapids.sql.enabled=true
|--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
@@ -2888,6 +2899,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.shuffle.multiThreaded.writer.threads' was not set.
|- 'spark.rapids.sql.batchSizeBytes' was not set.
|- 'spark.rapids.sql.concurrentGpuTasks' was not set.
+ |- 'spark.rapids.sql.enabled' was not set.
|- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set.
|- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
@@ -2943,6 +2955,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.rapids.sql.concurrentGpuTasks=2
+ |--conf spark.rapids.sql.enabled=true
|--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
@@ -2961,6 +2974,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.shuffle.multiThreaded.writer.threads' was not set.
|- 'spark.rapids.sql.batchSizeBytes' was not set.
|- 'spark.rapids.sql.concurrentGpuTasks' was not set.
+ |- 'spark.rapids.sql.enabled' was not set.
|- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set.
|- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
From 53e9739a6714da37aadcc33fd4105c34e7707b95 Mon Sep 17 00:00:00 2001
From: "Ahmed Hussein (amahussein)"
Date: Tue, 28 Jan 2025 17:22:51 -0600
Subject: [PATCH 2/7] cleanup commented code
Signed-off-by: Ahmed Hussein (amahussein)
---
.../com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
index 748f3a1d7..6da0cf512 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
@@ -995,13 +995,7 @@ class AutoTuner(
case Some(f) => f.contains("com.nvidia.spark.SQLPlugin")
case None => false
}
-// val rapidsEnabled = getPropertyValue("spark.rapids.sql.enabled") match {
-// case Some(f) => f.toBoolean
-// case None => true
-// }
-// if (!rapidsEnabled) {
-//
-// }
+ // Set the plugin to True without need to check if it is already set.
appendRecommendation("spark.rapids.sql.enabled", "true")
if (!isPluginLoaded) {
appendComment("RAPIDS Accelerator for Apache Spark jar is missing in \"spark.plugins\". " +
From 052ab0634675afa36b66be37d66c0459c5baafea Mon Sep 17 00:00:00 2001
From: Partho Sarthi
Date: Tue, 28 Jan 2025 15:50:51 -0800
Subject: [PATCH 3/7] Add fill in value for shuffle manager and auto broadcast
join threshold
Signed-off-by: Partho Sarthi
---
.../spark/rapids/tool/tuning/AutoTuner.scala | 25 ++++++++++++++++---
1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
index 6da0cf512..ff82a8dbb 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
@@ -634,7 +634,7 @@ class AutoTuner(
// set the Spark config spark.shuffle.sort.bypassMergeThreshold
getShuffleManagerClassName match {
case Right(smClassName) => appendRecommendation("spark.shuffle.manager", smClassName)
- case Left(comment) => appendComment(comment)
+ case Left(comment) => appendComment("spark.shuffle.manager", comment)
}
appendComment(autoTunerConfigsProvider.classPathComments("rapids.shuffle.jars"))
recommendFileCache()
@@ -789,13 +789,14 @@ class AutoTuner(
}
// TODO - can we set spark.sql.autoBroadcastJoinThreshold ???
+ val autoBroadcastJoinKey = "spark.sql.adaptive.autoBroadcastJoinThreshold"
val autoBroadcastJoinThresholdProperty =
- getPropertyValue("spark.sql.adaptive.autoBroadcastJoinThreshold").map(StringUtils.convertToMB)
+ getPropertyValue(autoBroadcastJoinKey).map(StringUtils.convertToMB)
if (autoBroadcastJoinThresholdProperty.isEmpty) {
- appendComment("'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.")
+ appendComment(autoBroadcastJoinKey, s"'$autoBroadcastJoinKey' was not set.")
} else if (autoBroadcastJoinThresholdProperty.get >
StringUtils.convertToMB(autoTunerConfigsProvider.AQE_AUTOBROADCAST_JOIN_THRESHOLD)) {
- appendComment("Setting 'spark.sql.adaptive.autoBroadcastJoinThreshold' > " +
+ appendComment(s"Setting '$autoBroadcastJoinKey' > " +
s"${autoTunerConfigsProvider.AQE_AUTOBROADCAST_JOIN_THRESHOLD} could " +
s"lead to performance\n" +
" regression. Should be set to a lower number.")
@@ -1014,6 +1015,22 @@ class AutoTuner(
comments += comment
}
+ /**
+ * Adds a comment for a configuration key when AutoTuner cannot provide a recommended value,
+ * but the configuration is necessary.
+ */
+ private def appendComment(
+ key: String,
+ comment: String,
+ fillInValue: Option[String] = None): Unit = {
+ if (!skippedRecommendations.contains(key)) {
+ val recomRecord = recommendations.getOrElseUpdate(key,
+ TuningEntry.build(key, getPropertyValue(key), None))
+ recomRecord.markAsUnresolved(fillInValue)
+ comments += comment
+ }
+ }
+
def convertClusterPropsToString(): String = {
clusterProps.toString
}
From e5938a832ed5d9d1f5806049fad768845b39c641 Mon Sep 17 00:00:00 2001
From: Partho Sarthi
Date: Tue, 28 Jan 2025 17:13:57 -0800
Subject: [PATCH 4/7] Fix unit tests
Signed-off-by: Partho Sarthi
---
.../tool/tuning/ProfilingAutoTunerSuite.scala | 34 +++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala
index a5ddab4b5..65f9f5cd9 100644
--- a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala
+++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala
@@ -91,6 +91,7 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite {
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark320.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
|--conf spark.sql.shuffle.partitions=200
@@ -306,6 +307,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
|--conf spark.sql.shuffle.partitions=200
@@ -369,6 +371,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
|--conf spark.sql.shuffle.partitions=200
@@ -440,6 +443,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.shuffle.partitions=200
|
@@ -503,6 +507,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.shuffle.partitions=200
|
@@ -562,6 +567,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.shuffle.partitions=200
|
|Comments:
@@ -616,6 +622,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.shuffle.partitions=200
|
@@ -675,6 +682,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.shuffle.partitions=200
|
@@ -727,6 +735,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
|--conf spark.sql.shuffle.partitions=200
@@ -790,6 +799,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.shuffle.partitions=200
|
@@ -849,6 +859,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.shuffle.partitions=200
|
@@ -896,6 +907,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
|--conf spark.sql.shuffle.partitions=200
@@ -997,6 +1009,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=4096m
|
@@ -1074,6 +1087,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=4096m
|
@@ -1144,6 +1158,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=4096m
|
@@ -1223,6 +1238,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=4096m
|
@@ -1304,6 +1320,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.files.maxPartitionBytes=3669m
|--conf spark.task.resource.gpu.amount=0.0625
|
@@ -1382,6 +1399,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.files.maxPartitionBytes=3669m
|--conf spark.task.resource.gpu.amount=0.0625
|
@@ -1455,6 +1473,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.files.maxPartitionBytes=3669m
|--conf spark.task.resource.gpu.amount=0.0625
|
@@ -1518,6 +1537,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.shuffle.partitions=200
|
@@ -1564,6 +1584,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.shuffle.partitions=200
|
@@ -1607,6 +1628,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.shuffle.partitions=200
|
@@ -1679,6 +1701,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.files.maxPartitionBytes=3669m
|--conf spark.task.resource.gpu.amount=0.0625
|
@@ -1753,6 +1776,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.files.maxPartitionBytes=3669m
|--conf spark.task.resource.gpu.amount=0.0625
|
@@ -1848,6 +1872,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=4096m
|
@@ -1956,6 +1981,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=32
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=4096m
|
@@ -2024,6 +2050,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=32
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=4096m
|
@@ -2379,6 +2406,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.files.maxPartitionBytes=3669m
|--conf spark.task.resource.gpu.amount=0.0625
|
@@ -2587,6 +2615,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.files.maxPartitionBytes=3669m
|--conf spark.sql.shuffle.partitions=400
|
@@ -2665,6 +2694,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.files.maxPartitionBytes=3669m
|--conf spark.sql.shuffle.partitions=200
|
@@ -2733,6 +2763,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
|--conf spark.sql.shuffle.partitions=200
@@ -2810,6 +2841,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
|--conf spark.sql.shuffle.partitions=200
@@ -2886,6 +2918,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
|--conf spark.sql.shuffle.partitions=200
@@ -2961,6 +2994,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark341.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
|--conf spark.sql.shuffle.partitions=200
From d9ebecfaa5274fb7f208b8fe395401b5b1e27689 Mon Sep 17 00:00:00 2001
From: "Ahmed Hussein (amahussein)"
Date: Wed, 29 Jan 2025 10:30:28 -0600
Subject: [PATCH 5/7] Add defaultSpark option in definitions
Signed-off-by: Ahmed Hussein (amahussein)
---
.../main/resources/bootstrap/tuningTable.yaml | 6 +++
.../spark/rapids/tool/tuning/AutoTuner.scala | 2 +-
.../rapids/tool/tuning/TuningEntry.scala | 27 ++++++++++++-
.../tool/tuning/TuningEntryDefinition.scala | 17 +++++++-
.../rapids/tool/tuning/TuningEntryTrait.scala | 4 +-
.../tool/tuning/ProfilingAutoTunerSuite.scala | 39 +++++++++++++++++++
6 files changed, 90 insertions(+), 5 deletions(-)
diff --git a/core/src/main/resources/bootstrap/tuningTable.yaml b/core/src/main/resources/bootstrap/tuningTable.yaml
index a5c1e443f..919a9a427 100644
--- a/core/src/main/resources/bootstrap/tuningTable.yaml
+++ b/core/src/main/resources/bootstrap/tuningTable.yaml
@@ -43,6 +43,12 @@ tuningDefinitions:
enabled: true
level: cluster
category: tuning
+ - label: spark.locality.wait
+ description: 'The time to wait to launch a data-local task before giving up and launching it on a less-local node. It is recommended to avoid waiting for a data-local task.'
+ enabled: true
+ level: cluster
+ category: tuning
+ defaultSpark: 3s
- label: spark.rapids.filecache.enabled
description: ''
enabled: true
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
index ff82a8dbb..6b07c9890 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
@@ -626,7 +626,7 @@ class AutoTuner(
}
appendRecommendation("spark.rapids.sql.batchSizeBytes",
autoTunerConfigsProvider.BATCH_SIZE_BYTES)
- appendRecommendation("spark.locality.wait", 0)
+ appendRecommendation("spark.locality.wait", "0")
}
def calculateJobLevelRecommendations(): Unit = {
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntry.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntry.scala
index 086cc50ea..7bf74cdb2 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntry.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntry.scala
@@ -27,10 +27,29 @@ import org.apache.spark.internal.Logging
*/
class TuningEntry(
override val name: String,
- override val originalValue: Option[String],
+ override var originalValue: Option[String],
override var tunedValue: Option[String],
definition: Option[TuningEntryDefinition] = None) extends TuningEntryTrait {
+ /**
+ * Set the original value from the default value in Spark if it exists.
+ * This is needed because some properties may not be set relying on the default value defined by
+ * Spark configurations.
+ */
+ override def setOriginalValueFromDefaultSpark(): Unit = {
+ originalValue match {
+ case Some(_) => // Do Nothing
+ case None =>
+ definition match {
+ case Some(defn) =>
+ if (defn.hasDefaultSpark()) {
+ originalValue = Some(defn.defaultSpark)
+ }
+ case None => // Do Nothing
+ }
+ }
+ }
+
override def isBootstrap(): Boolean = {
definition match {
case Some(defn) => defn.isBootstrap()
@@ -45,6 +64,12 @@ class TuningEntry(
}
globalFlag && enabled
}
+
+ /////////////////////////
+ // Initialization Code //
+ /////////////////////////
+
+ setOriginalValueFromDefaultSpark()
}
object TuningEntry extends Logging {
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryDefinition.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryDefinition.scala
index b6261ccd5..c6223e401 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryDefinition.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryDefinition.scala
@@ -38,6 +38,8 @@ import org.apache.spark.sql.rapids.tool.util.UTF8Source
* "tuning": required to tune the runtime.
* @param bootstrapEntry When true, the property should be added to the bootstrap configuration.
* Default is true.
+ * @param defaultSpark The default value of the property in Spark. This is used to set the
+ * originalValue of the property in case it is not set by the eventlog.
*/
class TuningEntryDefinition(
@BeanProperty var label: String,
@@ -45,9 +47,11 @@ class TuningEntryDefinition(
@BeanProperty var enabled: Boolean,
@BeanProperty var level: String,
@BeanProperty var category: String,
- @BeanProperty var bootstrapEntry: Boolean) {
+ @BeanProperty var bootstrapEntry: Boolean,
+ @BeanProperty var defaultSpark: String) {
def this() = {
- this("", "", enabled = true, "", "", bootstrapEntry = true)
+ this(label = "", description = "", enabled = true, level = "", category = "",
+ bootstrapEntry = true, defaultSpark = null)
}
def isEnabled(): Boolean = {
@@ -56,6 +60,15 @@ class TuningEntryDefinition(
def isBootstrap(): Boolean = {
bootstrapEntry || label.startsWith("spark.rapids.")
}
+
+ /**
+ * Indicates if the property has a default value in Spark. This implies that the default value
+ * can be used to set the original value of the property.
+ * @return true if the property has a default value in Spark.
+ */
+ def hasDefaultSpark(): Boolean = {
+ defaultSpark != null
+ }
}
class TuningEntries(
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala
index 123b70a87..7638c7497 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala
@@ -30,7 +30,7 @@ trait TuningEntryTrait {
// The value recommended by the AutoTuner
var tunedValue: Option[String]
// The original value of the property from the event log
- val originalValue: Option[String]
+ var originalValue: Option[String]
var enabled: Boolean = true
// The type of tuning operation to be performed
@@ -44,6 +44,8 @@ trait TuningEntryTrait {
"--conf %s=%s".format(name, getTuneValue())
}
+ def setOriginalValueFromDefaultSpark(): Unit
+
/**
* Indicates if the property is resolved by the AutoTuner or not. This is used to distinguish
* the properties that were not tuned due to a failure in the AutoTuner. For example,
diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala
index 65f9f5cd9..1f8d9d433 100644
--- a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala
+++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala
@@ -80,6 +80,7 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite {
|--conf spark.executor.instances=2
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -143,6 +144,7 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite {
val expectedResults =
s"""|
|Spark Properties:
+ |--conf spark.locality.wait=0
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.files.maxPartitionBytes=512m
@@ -185,6 +187,7 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite {
val expectedResults =
s"""|
|Spark Properties:
+ |--conf spark.locality.wait=0
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.files.maxPartitionBytes=512m
@@ -246,6 +249,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
val expectedResults =
s"""|
|Spark Properties:
+ |--conf spark.locality.wait=0
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.files.maxPartitionBytes=512m
@@ -296,6 +300,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=8
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -360,6 +365,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=2
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -434,6 +440,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|Spark Properties:
|--conf spark.executor.instances=4
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
@@ -499,6 +506,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|Spark Properties:
|--conf spark.executor.instances=8
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
@@ -560,6 +568,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|Spark Properties:
|--conf spark.executor.instances=8
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
@@ -614,6 +623,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|Spark Properties:
|--conf spark.executor.instances=8
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
@@ -674,6 +684,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|Spark Properties:
|--conf spark.executor.instances=8
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
@@ -724,6 +735,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=2
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -791,6 +803,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|Spark Properties:
|--conf spark.executor.instances=8
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
@@ -851,6 +864,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.cores=16
|--conf spark.executor.instances=4
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
@@ -896,6 +910,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=8
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -999,6 +1014,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -1077,6 +1093,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=5
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -1146,6 +1163,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -1226,6 +1244,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -1310,6 +1329,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -1389,6 +1409,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -1463,6 +1484,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -1529,6 +1551,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|Spark Properties:
|--conf spark.executor.instances=8
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
@@ -1576,6 +1599,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|Spark Properties:
|--conf spark.executor.instances=8
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
@@ -1620,6 +1644,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|Spark Properties:
|--conf spark.executor.instances=8
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=28
@@ -1690,6 +1715,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.filecache.enabled=true
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
@@ -1766,6 +1792,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -1862,6 +1889,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|Spark Properties:
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -1973,6 +2001,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=8
|--conf spark.executor.memory=32768m
|--conf $memoryOverheadLabel=13516m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=24
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=24
@@ -2042,6 +2071,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=8
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=13516m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=24
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=24
@@ -2133,6 +2163,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
val expectedResults =
s"""|
|Spark Properties:
+ |--conf spark.locality.wait=0
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.rapids.sql.incompatibleDateFormats.enabled=true
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
@@ -2252,6 +2283,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -2396,6 +2428,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -2604,6 +2637,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -2683,6 +2717,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -2751,6 +2786,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
|--conf spark.kryo.registrator=com.nvidia.spark.rapids.GpuKryoRegistrator
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -2829,6 +2865,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
|--conf spark.kryo.registrator=org.apache.SomeRegistrator,org.apache.SomeOtherRegistrator,com.nvidia.spark.rapids.GpuKryoRegistrator
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -2906,6 +2943,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
|--conf spark.kryo.registrator=com.nvidia.spark.rapids.GpuKryoRegistrator
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
@@ -2982,6 +3020,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.executor.instances=10
|--conf spark.executor.memory=32768m
|--conf spark.executor.memoryOverhead=17612m
+ |--conf spark.locality.wait=0
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=28
From cda52a253e66192f9440f004c3dbb5a326ac138c Mon Sep 17 00:00:00 2001
From: "Ahmed Hussein (amahussein)"
Date: Thu, 30 Jan 2025 09:26:02 -0600
Subject: [PATCH 6/7] Fix typos
Signed-off-by: Ahmed Hussein (amahussein)
---
.../nvidia/spark/rapids/tool/tuning/BootstrapReport.scala | 1 +
.../nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala | 6 +++---
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/BootstrapReport.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/BootstrapReport.scala
index 9f834abba..db5527e73 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/BootstrapReport.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/BootstrapReport.scala
@@ -36,6 +36,7 @@ class BootstrapReport(tuningResult: TuningResult,
private def loadBootstrapEntries(): Seq[TuningEntryTrait] = {
tuningResult.recommendations.filter(e => e.isEnabled() && e.isBootstrap() && !e.isRemoved())
}
+
def generateReport(): Unit = {
val textFileWriter = new ToolTextFileWriter(outputDir,
s"${tuningResult.appID}-bootstrap.conf",
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala
index 7638c7497..f4ca1c9c9 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala
@@ -132,8 +132,8 @@ trait TuningEntryTrait {
def isEnabled(): Boolean
/**
- * Used to compare between two properties by converting memory units to
- * a equivalent representations.
+ * Used to compare between two properties by converting memory units to equivalent
+ * representations.
* @param propValue property to be processed.
* @return the uniform representation of property.
* For Memory, the value is converted to bytes.
@@ -172,7 +172,7 @@ trait TuningEntryTrait {
}
case (None, Some(_)) => setTuningOpType(TuningOpTypes.ADD)
case (Some(orig), None) =>
- // It is possible that the property was not set bu the AutoTuner, then it means it should
+ // It is possible that the property is not set by the AutoTuner, then it means it should
// be copied from the original configuration.
setRecommendedValue(orig)
}
From 4b095d5376c33e5edb3703325981cd178a89c902 Mon Sep 17 00:00:00 2001
From: "Ahmed Hussein (amahussein)"
Date: Thu, 30 Jan 2025 15:25:01 -0600
Subject: [PATCH 7/7] populate descriptions and add default spark values
Signed-off-by: Ahmed Hussein (amahussein)
---
.../main/resources/bootstrap/tuningTable.yaml | 68 ++++++++++-------
.../spark/rapids/tool/tuning/AutoTuner.scala | 1 +
.../tool/tuning/ProfilingAutoTunerSuite.scala | 76 -------------------
.../tuning/QualificationAutoTunerSuite.scala | 14 ++--
4 files changed, 50 insertions(+), 109 deletions(-)
diff --git a/core/src/main/resources/bootstrap/tuningTable.yaml b/core/src/main/resources/bootstrap/tuningTable.yaml
index 919a9a427..a40c79611 100644
--- a/core/src/main/resources/bootstrap/tuningTable.yaml
+++ b/core/src/main/resources/bootstrap/tuningTable.yaml
@@ -13,33 +13,38 @@
# limitations under the License.
tuningDefinitions:
+ - label: spark.databricks.adaptive.autoOptimizeShuffle.enabled
+ description: 'Auto-Optimized shuffle. It is recommended to turn it off to set (spark.sql.shuffle.partitions) manually.'
+ enabled: true
+ level: job
+ category: tuning
- label: spark.executor.cores
- description: ''
+ description: 'The number of cores to use on each executor. It is recommended to be set to 16'
enabled: true
level: cluster
category: tuning
- label: spark.executor.instances
- description: ''
+ description: 'Controls parellelism level. It is recommended to be set to (cpuCoresPerNode * numWorkers) / spark.executor.cores.'
enabled: true
level: cluster
category: tuning
- label: spark.executor.memory
- description: ''
+ description: 'Amount of memory to use per executor process. This is tuned based on the available CPU memory on worker node.'
enabled: true
level: cluster
category: tuning
- label: spark.executor.memoryOverhead
- description: ''
+ description: 'Amount of additional memory to be allocated per executor process, in MiB unless otherwise specified. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the executor size.'
enabled: true
level: cluster
category: tuning
- label: spark.executor.memoryOverheadFactor
- description: ''
+ description: 'Fraction of executor memory to be allocated as additional non-heap memory per executor process. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the container size.'
enabled: true
level: cluster
category: tuning
- label: spark.kubernetes.memoryOverheadFactor
- description: ''
+ description: 'Specific to K8s. Fraction of executor memory to be allocated as additional non-heap memory per executor process.'
enabled: true
level: cluster
category: tuning
@@ -48,22 +53,24 @@ tuningDefinitions:
enabled: true
level: cluster
category: tuning
- defaultSpark: 3s
+ defaultSpark: "3s"
- label: spark.rapids.filecache.enabled
- description: ''
+ description: 'Enables RAPIDS file cache. The file cache stores data locally in the same local directories that have been configured for the Spark executor.'
enabled: true
level: job
+ category: tuning
- label: spark.rapids.memory.pinnedPool.size
- description: ''
+ description: 'The size of the pinned memory pool in bytes unless otherwise specified. Use 0 to disable the pool.'
enabled: true
level: cluster
+ category: tuning
- label: spark.rapids.shuffle.multiThreaded.maxBytesInFlight
- description: ''
+ description: 'This property controls the amount of bytes we allow in flight per Spark task. This typically happens on the reader side, when blocks are received from the network, they’re queued onto these threads for decompression and decode. '
enabled: true
level: cluster
category: tuning
- label: spark.rapids.shuffle.multiThreaded.reader.threads
- description: ''
+ description: 'The shuffle reader is a single implementation irrespective of the number of partitions. Set the value to zero to turn off multi-threaded reader entirely.'
enabled: true
level: cluster
category: tuning
@@ -73,86 +80,91 @@ tuningDefinitions:
level: cluster
category: tuning
- label: spark.rapids.sql.batchSizeBytes
- description: ''
+ description: 'Set the target number of bytes for a GPU batch. Splits sizes for input data is covered by separate configs.'
enabled: true
level: job
category: tuning
- label: spark.rapids.sql.concurrentGpuTasks
- description: ''
+ description: 'Set the number of tasks that can execute concurrently per GPU. Tasks may temporarily block when the number of concurrent tasks in the executor exceeds this amount. Allowing too many concurrent tasks on the same GPU may lead to GPU out of memory errors.'
enabled: true
level: cluster
category: tuning
- label: spark.rapids.sql.format.parquet.multithreaded.combine.waitTime
- description: ''
+ description: 'When using the multithreaded parquet reader with combine mode, how long to wait, in milliseconds, for more files to finish if haven’t met the size threshold. Note that this will wait this amount of time from when the last file was available, so total wait time could be larger then this. DEPRECATED: use spark.rapids.sql.reader.multithreaded.combine.waitTime instead.'
enabled: true
level: cluster
category: tuning
- label: spark.rapids.sql.enabled
- description: 'should be true to enable SQL operations on the GPU.'
+ description: 'Should be true to enable SQL operations on the GPU.'
enabled: true
level: cluster
category: functionality
- label: spark.rapids.sql.multiThreadedRead.numThreads
- description: ''
+ description: 'The maximum number of threads on each executor to use for reading small files in parallel.'
enabled: true
level: cluster
category: tuning
- label: spark.rapids.sql.reader.multithreaded.combine.sizeBytes
- description: ''
+ description: 'The target size in bytes to combine multiple small files together when using the MULTITHREADED parquet or orc reader. With combine disabled, the MULTITHREADED reader reads the files in parallel and sends individual files down to the GPU, but that can be inefficient for small files.'
enabled: true
level: cluster
category: tuning
- label: spark.shuffle.manager
- description: ''
+ description: 'The RAPIDS Shuffle Manager is an implementation of the ShuffleManager interface in Apache Spark that allows custom mechanisms to exchange shuffle data. We currently expose two modes of operation: Multi Threaded and UCX.'
enabled: true
level: cluster
+ category: tuning
- label: spark.sql.adaptive.enabled
- description: ''
+ description: 'When true, enable adaptive query execution, which re-optimizes the query plan in the middle of query execution, based on accurate runtime statistics.'
enabled: true
level: job
category: tuning
+ defaultSpark: "true"
- label: spark.sql.adaptive.advisoryPartitionSizeInBytes
- description: ''
+ description: 'The advisory size in bytes of the shuffle partition during adaptive optimization (when spark.sql.adaptive.enabled is true). It takes effect when Spark coalesces small shuffle partitions or splits skewed shuffle partition.'
enabled: true
level: job
category: tuning
- label: spark.sql.adaptive.coalescePartitions.initialPartitionNum
- description: ''
+ description: 'The initial number of shuffle partitions before coalescing. If not set, it equals to spark.sql.shuffle.partitions.'
enabled: true
level: job
category: tuning
- label: spark.sql.adaptive.coalescePartitions.minPartitionNum
- description: ''
+ description: '(deprecated) The suggested (not guaranteed) minimum number of shuffle partitions after coalescing. If not set, the default value is the default parallelism of the Spark cluster.'
enabled: true
level: job
category: tuning
- label: spark.sql.adaptive.coalescePartitions.minPartitionSize
- description: ''
+ description: 'The minimum size of shuffle partitions after coalescing. This is useful when the adaptively calculated target size is too small during partition coalescing.'
enabled: true
level: job
category: tuning
+ defaultSpark: "1m"
- label: spark.sql.adaptive.coalescePartitions.parallelismFirst
- description: ''
+ description: 'When true, Spark does not respect the target size specified by (spark.sql.adaptive.advisoryPartitionSizeInBytes) (default 64MB) when coalescing contiguous shuffle partitions, but adaptively calculate the target size according to the default parallelism of the Spark cluster.'
enabled: true
level: job
category: tuning
+ defaultSpark: "true"
- label: spark.sql.adaptive.autoBroadcastJoinThreshold
- description: ''
+ description: 'Configures the maximum size in bytes for a table that will be broadcast to all worker nodes when performing a join. By setting this value to -1, broadcasting can be disabled.'
enabled: true
level: job
category: tuning
- label: spark.sql.files.maxPartitionBytes
- description: ''
+ description: 'The maximum number of bytes to pack into a single partition when reading files. This configuration is effective only when using file-based sources such as Parquet, JSON and ORC.'
enabled: true
level: job
category: tuning
- label: spark.sql.shuffle.partitions
- description: ''
+ description: 'The default number of partitions to use when shuffling data for joins or aggregations. Note: For structured streaming, this configuration cannot be changed between query restarts from the same checkpoint location.'
enabled: true
level: job
category: tuning
+ defaultSpark: "200"
- label: spark.task.resource.gpu.amount
- description: ''
+ description: 'The GPU resource amount per task when Apache Spark schedules GPU resources. For example, setting the value to 1 means that only one task will run concurrently per executor.'
enabled: true
level: cluster
category: tuning
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
index 74c9f1b5f..7047060c7 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala
@@ -719,6 +719,7 @@ class AutoTuner(
}
private def recommendAQEProperties(): Unit = {
+ // Spark configuration (AQE is enabled by default)
val aqeEnabled = getPropertyValue("spark.sql.adaptive.enabled")
.getOrElse("false").toLowerCase
if (aqeEnabled == "false") {
diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala
index 68dc72064..f1cd67f9c 100644
--- a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala
+++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala
@@ -95,7 +95,6 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite {
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
- |--conf spark.sql.shuffle.partitions=200
|--conf spark.task.resource.gpu.amount=0.001
|
|Comments:
@@ -115,10 +114,8 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite {
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.files.maxPartitionBytes' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- 'spark.task.resource.gpu.amount' was not set.
|- Number of workers is missing. Setting default to 1.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
@@ -141,7 +138,6 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite {
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.files.maxPartitionBytes=512m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.executor.cores' should be set to 16.
@@ -154,7 +150,6 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite {
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.files.maxPartitionBytes' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- 'spark.task.resource.gpu.amount' should be set to 0.001.
|- Could not infer the cluster configuration, recommendations are generated using default values!
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
@@ -178,7 +173,6 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite {
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.files.maxPartitionBytes=512m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.executor.cores' should be set to 16.
@@ -191,7 +185,6 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite {
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.files.maxPartitionBytes' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- 'spark.task.resource.gpu.amount' should be set to 0.001.
|- Could not infer the cluster configuration, recommendations are generated using default values!
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
@@ -234,7 +227,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager
|--conf spark.sql.files.maxPartitionBytes=512m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.executor.cores' should be set to 16.
@@ -247,7 +239,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.files.maxPartitionBytes' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- 'spark.task.resource.gpu.amount' should be set to 0.001.
|- Could not infer the cluster configuration, recommendations are generated using default values!
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
@@ -290,7 +281,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
- |--conf spark.sql.shuffle.partitions=200
|--conf spark.task.resource.gpu.amount=0.001
|
|Comments:
@@ -310,10 +300,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.files.maxPartitionBytes' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- 'spark.task.resource.gpu.amount' was not set.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -355,7 +343,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
- |--conf spark.sql.shuffle.partitions=200
|--conf spark.task.resource.gpu.amount=0.001
|
|Comments:
@@ -375,10 +362,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.files.maxPartitionBytes' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- 'spark.task.resource.gpu.amount' was not set.
|- Number of workers is missing. Setting default to 1.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
@@ -427,7 +412,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set.
@@ -440,8 +424,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- GPU count is missing. Setting default to 1.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -492,7 +474,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set.
@@ -501,8 +482,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- GPU memory is missing. Setting default to 15109m.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -552,7 +531,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.rapids.sql.multiThreadedRead.numThreads=80
|--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set.
@@ -560,7 +538,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- GPU memory is missing. Setting default to 15109m.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -609,7 +586,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set.
@@ -618,8 +594,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- GPU device is missing. Setting default to $T4Gpu.
|- GPU memory is missing. Setting default to ${T4Gpu.getMemory}.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
@@ -670,7 +644,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set.
@@ -679,8 +652,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- GPU device is missing. Setting default to $T4Gpu.
|- GPU memory is missing. Setting default to ${T4Gpu.getMemory}.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
@@ -725,7 +696,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
- |--conf spark.sql.shuffle.partitions=200
|--conf spark.task.resource.gpu.amount=0.001
|
|Comments:
@@ -745,10 +715,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.files.maxPartitionBytes' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- 'spark.task.resource.gpu.amount' was not set.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -789,7 +757,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set.
@@ -798,8 +765,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
|""".stripMargin
@@ -850,7 +815,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set.
@@ -859,8 +823,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
|""".stripMargin
@@ -900,7 +862,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
- |--conf spark.sql.shuffle.partitions=200
|--conf spark.task.resource.gpu.amount=0.001
|
|Comments:
@@ -920,10 +881,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.files.maxPartitionBytes' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- 'spark.task.resource.gpu.amount' was not set.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -1012,7 +971,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -1091,7 +1049,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -1168,7 +1125,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- RAPIDS Accelerator for Apache Spark jar is missing in "spark.plugins". Please refer to https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
@@ -1248,7 +1204,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -1537,7 +1492,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set.
@@ -1546,8 +1500,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.multiple")} [23.06.0, 23.02.1]
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
|""".stripMargin
@@ -1585,7 +1537,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set.
@@ -1594,8 +1545,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- A newer RAPIDS Accelerator for Apache Spark plugin is available:
| $pluginJarMvnURl
| Version used in application is $jarVer.
@@ -1630,7 +1579,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set.
@@ -1639,8 +1587,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
|""".stripMargin
val rapidsJarsArr = Seq(s"rapids-4-spark_2.12-$latestRelease.jar")
@@ -1886,7 +1832,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -1999,7 +1944,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -2068,7 +2012,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -2160,7 +2103,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.executor.instances' was not set.
@@ -2174,10 +2116,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.files.maxPartitionBytes' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
|- ${ProfilingAutoTunerConfigsProvider.commentForExperimentalConfig("spark.rapids.sql.incompatibleDateFormats.enabled")}
@@ -2308,7 +2248,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- Setting 'spark.sql.adaptive.autoBroadcastJoinThreshold' > 100m could lead to performance\n regression. Should be set to a lower number.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -2665,7 +2604,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.shuffle.partitions' should be increased since spilling occurred in shuffle stages.
- |- 'spark.sql.shuffle.partitions' was not set.
|- Average JVM GC time is very high. Other Garbage Collectors can be used for better performance.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
@@ -2731,7 +2669,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.files.maxPartitionBytes=3669m
- |--conf spark.sql.shuffle.partitions=200
|
|Comments:
|- 'spark.executor.memoryOverhead' was not set.
@@ -2744,7 +2681,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
- |- 'spark.sql.shuffle.partitions' was not set.
|- Average JVM GC time is very high. Other Garbage Collectors can be used for better performance.
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
|- Shuffle skew exists (when task's Shuffle Read Size > 3 * Avg Stage-level size) in
@@ -2802,7 +2738,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
- |--conf spark.sql.shuffle.partitions=200
|--conf spark.task.resource.gpu.amount=0.001
|
|Comments:
@@ -2821,10 +2756,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.files.maxPartitionBytes' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- 'spark.task.resource.gpu.amount' was not set.
|- RAPIDS Accelerator for Apache Spark jar is missing in "spark.plugins". Please refer to https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
@@ -2881,7 +2814,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
- |--conf spark.sql.shuffle.partitions=200
|--conf spark.task.resource.gpu.amount=0.001
|
|Comments:
@@ -2899,10 +2831,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.files.maxPartitionBytes' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- 'spark.task.resource.gpu.amount' was not set.
|- RAPIDS Accelerator for Apache Spark jar is missing in "spark.plugins". Please refer to https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
@@ -2959,7 +2889,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
- |--conf spark.sql.shuffle.partitions=200
|--conf spark.task.resource.gpu.amount=0.001
|
|Comments:
@@ -2977,10 +2906,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.files.maxPartitionBytes' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- 'spark.task.resource.gpu.amount' was not set.
|- RAPIDS Accelerator for Apache Spark jar is missing in "spark.plugins". Please refer to https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")}
@@ -3036,7 +2963,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE]
|--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m
|--conf spark.sql.files.maxPartitionBytes=512m
- |--conf spark.sql.shuffle.partitions=200
|--conf spark.task.resource.gpu.amount=0.001
|
|Comments:
@@ -3054,10 +2980,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.
- |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set.
|- 'spark.sql.adaptive.enabled' should be enabled for better performance.
|- 'spark.sql.files.maxPartitionBytes' was not set.
- |- 'spark.sql.shuffle.partitions' was not set.
|- 'spark.task.resource.gpu.amount' was not set.
|- RAPIDS Accelerator for Apache Spark jar is missing in "spark.plugins". Please refer to https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html
|- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")}
diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerSuite.scala
index c6891d636..a65ddd064 100644
--- a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerSuite.scala
+++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerSuite.scala
@@ -76,14 +76,18 @@ class QualificationAutoTunerSuite extends BaseAutoTunerSuite {
assert(expectedResults.forall(autoTunerOutput.contains))
}
- test("test AutoTuner for Qualification sets shuffle partitions to 200") {
- val autoTuner = buildDefaultAutoTuner()
+ test("test AutoTuner for Qualification should not change shuffle partitions") {
+ // Set shuffle partitions to 100. The AutoTuner should recommend the same value
+ // because currently shuffle.partitions is one of the limitedLogicRecommendations.
+ // It will not be added to the recommendations because the value has not changed.
+ val autoTuner = buildDefaultAutoTuner(
+ defaultSparkProps ++ mutable.Map("spark.sql.shuffle.partitions" -> "100")
+ )
val (properties, comments) = autoTuner.getRecommendedProperties()
val autoTunerOutput = Profiler.getAutoTunerResultsAsString(properties, comments)
val expectedResults = Seq(
- "--conf spark.sql.shuffle.partitions=200",
- "- 'spark.sql.shuffle.partitions' was not set."
+ "--conf spark.sql.shuffle.partitions"
)
- assert(expectedResults.forall(autoTunerOutput.contains))
+ assert(expectedResults.forall(t => !autoTunerOutput.contains(t)))
}
}