diff --git a/core/src/main/resources/bootstrap/tuningTable.yaml b/core/src/main/resources/bootstrap/tuningTable.yaml new file mode 100644 index 000000000..a40c79611 --- /dev/null +++ b/core/src/main/resources/bootstrap/tuningTable.yaml @@ -0,0 +1,170 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +tuningDefinitions: + - label: spark.databricks.adaptive.autoOptimizeShuffle.enabled + description: 'Auto-Optimized shuffle. It is recommended to turn it off to set (spark.sql.shuffle.partitions) manually.' + enabled: true + level: job + category: tuning + - label: spark.executor.cores + description: 'The number of cores to use on each executor. It is recommended to be set to 16' + enabled: true + level: cluster + category: tuning + - label: spark.executor.instances + description: 'Controls parellelism level. It is recommended to be set to (cpuCoresPerNode * numWorkers) / spark.executor.cores.' + enabled: true + level: cluster + category: tuning + - label: spark.executor.memory + description: 'Amount of memory to use per executor process. This is tuned based on the available CPU memory on worker node.' + enabled: true + level: cluster + category: tuning + - label: spark.executor.memoryOverhead + description: 'Amount of additional memory to be allocated per executor process, in MiB unless otherwise specified. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the executor size.' + enabled: true + level: cluster + category: tuning + - label: spark.executor.memoryOverheadFactor + description: 'Fraction of executor memory to be allocated as additional non-heap memory per executor process. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the container size.' + enabled: true + level: cluster + category: tuning + - label: spark.kubernetes.memoryOverheadFactor + description: 'Specific to K8s. Fraction of executor memory to be allocated as additional non-heap memory per executor process.' + enabled: true + level: cluster + category: tuning + - label: spark.locality.wait + description: 'The time to wait to launch a data-local task before giving up and launching it on a less-local node. It is recommended to avoid waiting for a data-local task.' + enabled: true + level: cluster + category: tuning + defaultSpark: "3s" + - label: spark.rapids.filecache.enabled + description: 'Enables RAPIDS file cache. The file cache stores data locally in the same local directories that have been configured for the Spark executor.' + enabled: true + level: job + category: tuning + - label: spark.rapids.memory.pinnedPool.size + description: 'The size of the pinned memory pool in bytes unless otherwise specified. Use 0 to disable the pool.' + enabled: true + level: cluster + category: tuning + - label: spark.rapids.shuffle.multiThreaded.maxBytesInFlight + description: 'This property controls the amount of bytes we allow in flight per Spark task. This typically happens on the reader side, when blocks are received from the network, they’re queued onto these threads for decompression and decode. ' + enabled: true + level: cluster + category: tuning + - label: spark.rapids.shuffle.multiThreaded.reader.threads + description: 'The shuffle reader is a single implementation irrespective of the number of partitions. Set the value to zero to turn off multi-threaded reader entirely.' + enabled: true + level: cluster + category: tuning + - label: spark.rapids.shuffle.multiThreaded.writer.threads + description: '' + enabled: true + level: cluster + category: tuning + - label: spark.rapids.sql.batchSizeBytes + description: 'Set the target number of bytes for a GPU batch. Splits sizes for input data is covered by separate configs.' + enabled: true + level: job + category: tuning + - label: spark.rapids.sql.concurrentGpuTasks + description: 'Set the number of tasks that can execute concurrently per GPU. Tasks may temporarily block when the number of concurrent tasks in the executor exceeds this amount. Allowing too many concurrent tasks on the same GPU may lead to GPU out of memory errors.' + enabled: true + level: cluster + category: tuning + - label: spark.rapids.sql.format.parquet.multithreaded.combine.waitTime + description: 'When using the multithreaded parquet reader with combine mode, how long to wait, in milliseconds, for more files to finish if haven’t met the size threshold. Note that this will wait this amount of time from when the last file was available, so total wait time could be larger then this. DEPRECATED: use spark.rapids.sql.reader.multithreaded.combine.waitTime instead.' + enabled: true + level: cluster + category: tuning + - label: spark.rapids.sql.enabled + description: 'Should be true to enable SQL operations on the GPU.' + enabled: true + level: cluster + category: functionality + - label: spark.rapids.sql.multiThreadedRead.numThreads + description: 'The maximum number of threads on each executor to use for reading small files in parallel.' + enabled: true + level: cluster + category: tuning + - label: spark.rapids.sql.reader.multithreaded.combine.sizeBytes + description: 'The target size in bytes to combine multiple small files together when using the MULTITHREADED parquet or orc reader. With combine disabled, the MULTITHREADED reader reads the files in parallel and sends individual files down to the GPU, but that can be inefficient for small files.' + enabled: true + level: cluster + category: tuning + - label: spark.shuffle.manager + description: 'The RAPIDS Shuffle Manager is an implementation of the ShuffleManager interface in Apache Spark that allows custom mechanisms to exchange shuffle data. We currently expose two modes of operation: Multi Threaded and UCX.' + enabled: true + level: cluster + category: tuning + - label: spark.sql.adaptive.enabled + description: 'When true, enable adaptive query execution, which re-optimizes the query plan in the middle of query execution, based on accurate runtime statistics.' + enabled: true + level: job + category: tuning + defaultSpark: "true" + - label: spark.sql.adaptive.advisoryPartitionSizeInBytes + description: 'The advisory size in bytes of the shuffle partition during adaptive optimization (when spark.sql.adaptive.enabled is true). It takes effect when Spark coalesces small shuffle partitions or splits skewed shuffle partition.' + enabled: true + level: job + category: tuning + - label: spark.sql.adaptive.coalescePartitions.initialPartitionNum + description: 'The initial number of shuffle partitions before coalescing. If not set, it equals to spark.sql.shuffle.partitions.' + enabled: true + level: job + category: tuning + - label: spark.sql.adaptive.coalescePartitions.minPartitionNum + description: '(deprecated) The suggested (not guaranteed) minimum number of shuffle partitions after coalescing. If not set, the default value is the default parallelism of the Spark cluster.' + enabled: true + level: job + category: tuning + - label: spark.sql.adaptive.coalescePartitions.minPartitionSize + description: 'The minimum size of shuffle partitions after coalescing. This is useful when the adaptively calculated target size is too small during partition coalescing.' + enabled: true + level: job + category: tuning + defaultSpark: "1m" + - label: spark.sql.adaptive.coalescePartitions.parallelismFirst + description: 'When true, Spark does not respect the target size specified by (spark.sql.adaptive.advisoryPartitionSizeInBytes) (default 64MB) when coalescing contiguous shuffle partitions, but adaptively calculate the target size according to the default parallelism of the Spark cluster.' + enabled: true + level: job + category: tuning + defaultSpark: "true" + - label: spark.sql.adaptive.autoBroadcastJoinThreshold + description: 'Configures the maximum size in bytes for a table that will be broadcast to all worker nodes when performing a join. By setting this value to -1, broadcasting can be disabled.' + enabled: true + level: job + category: tuning + - label: spark.sql.files.maxPartitionBytes + description: 'The maximum number of bytes to pack into a single partition when reading files. This configuration is effective only when using file-based sources such as Parquet, JSON and ORC.' + enabled: true + level: job + category: tuning + - label: spark.sql.shuffle.partitions + description: 'The default number of partitions to use when shuffling data for joins or aggregations. Note: For structured streaming, this configuration cannot be changed between query restarts from the same checkpoint location.' + enabled: true + level: job + category: tuning + defaultSpark: "200" + - label: spark.task.resource.gpu.amount + description: 'The GPU resource amount per task when Apache Spark schedules GPU resources. For example, setting the value to 1 means that only one task will run concurrently per executor.' + enabled: true + level: cluster + category: tuning diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala index 2a7069296..620509804 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala @@ -23,7 +23,7 @@ import scala.collection.mutable.{ArrayBuffer, HashMap} import scala.util.control.NonFatal import com.nvidia.spark.rapids.tool.{AppSummaryInfoBaseProvider, EventLogInfo, EventLogPathProcessor, FailedEventLog, Platform, PlatformFactory, ToolBase} -import com.nvidia.spark.rapids.tool.tuning.{AutoTuner, ProfilingAutoTunerConfigsProvider} +import com.nvidia.spark.rapids.tool.tuning.{AutoTuner, ProfilingAutoTunerConfigsProvider, TuningEntryTrait} import com.nvidia.spark.rapids.tool.views._ import org.apache.hadoop.conf.Configuration @@ -416,7 +416,7 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs, enablePB: Boolea */ private def runAutoTuner(appInfo: Option[ApplicationSummaryInfo], driverInfoProvider: DriverLogInfoProvider = BaseDriverLogInfoProvider.noneDriverLog) - : (Seq[RecommendedPropertyResult], Seq[RecommendedCommentResult]) = { + : (Seq[TuningEntryTrait], Seq[RecommendedCommentResult]) = { // only run the auto tuner on GPU event logs for profiling tool right now. There are // assumptions made in the code if (appInfo.isDefined && appInfo.get.appInfo.head.pluginEnabled) { @@ -611,10 +611,10 @@ object Profiler { val COMBINED_LOG_FILE_NAME_PREFIX = "rapids_4_spark_tools_combined" val SUBDIR = "rapids_4_spark_profile" - def getAutoTunerResultsAsString(props: Seq[RecommendedPropertyResult], + def getAutoTunerResultsAsString(props: Seq[TuningEntryTrait], comments: Seq[RecommendedCommentResult]): String = { val propStr = if (props.nonEmpty) { - val propertiesToStr = props.map(_.toString).reduce(_ + "\n" + _) + val propertiesToStr = props.map(_.toConfString).reduce(_ + "\n" + _) s"\nSpark Properties:\n$propertiesToStr\n" } else { "Cannot recommend properties. See Comments.\n" diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala index a1193acb5..7047060c7 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala @@ -194,74 +194,6 @@ class ClusterProperties( s"{${system.toString}, ${gpu.toString}, $softwareProperties}" } -/** - * Wrapper to hold the recommendation of a given criterion. - * - * @param name the property label. - * @param original the value loaded from the spark properties. - * @param recommended the recommended value by the AutoTuner. - */ -class RecommendationEntry(val name: String, - val original: Option[String], - var recommended: Option[String]) { - - def setRecommendedValue(value: String): Unit = { - recommended = Option(value) - } - - /** - * Used to compare between two properties by converting memory units to - * a equivalent representations. - * @param propValue property to be processed. - * @return the uniform representation of property. - * For Memory, the value is converted to bytes. - */ - private def getRawValue(propValue: Option[String]): Option[String] = { - propValue match { - case None => None - case Some(value) => - if (StringUtils.isMemorySize(value)) { - // if it is memory return the bytes unit - Some(s"${StringUtils.convertMemorySizeToBytes(value)}") - } else { - propValue - } - } - } - - /** - * Returns true when the recommendation is different than the original. - */ - private def recommendsNewValue(): Boolean = { - val originalVal = getRawValue(original) - val recommendedVal = getRawValue(recommended) - (originalVal, recommendedVal) match { - case (None, None) => false - case (Some(orig), Some(rec)) => - orig != rec - case _ => true - } - } - - /** - * True or False whether the recommendation is valid. e.g., recommendations that does not change - * the original value returns false if filter is enabled. - * @param filterByUpdated flag to pick only the properties that would be updated by the - * recommendations - */ - def isValid(filterByUpdated: Boolean): Boolean = { - recommended match { - case None => false - case _ => - if (filterByUpdated) { // filter enabled - recommendsNewValue() - } else { - true - } - } - } -} - /** * Represents different Spark master types. */ @@ -358,8 +290,8 @@ class AutoTuner( extends Logging { var comments = new mutable.ListBuffer[String]() - var recommendations: mutable.LinkedHashMap[String, RecommendationEntry] = - mutable.LinkedHashMap[String, RecommendationEntry]() + var recommendations: mutable.LinkedHashMap[String, TuningEntryTrait] = + mutable.LinkedHashMap[String, TuningEntryTrait]() // list of recommendations to be skipped for recommendations // Note that the recommendations will be computed anyway to avoid breaking dependencies. private val skippedRecommendations: mutable.HashSet[String] = mutable.HashSet[String]() @@ -392,7 +324,7 @@ class AutoTuner( autoTunerConfigsProvider.recommendationsTarget.foreach { key => // no need to add new records if they are missing from props getPropertyValue(key).foreach { propVal => - val recommendationVal = new RecommendationEntry(key, Option(propVal), None) + val recommendationVal = TuningEntry.build(key, Option(propVal), None) recommendations(key) = recommendationVal } } @@ -401,10 +333,10 @@ class AutoTuner( def appendRecommendation(key: String, value: String): Unit = { if (!skippedRecommendations.contains(key)) { val recomRecord = recommendations.getOrElseUpdate(key, - new RecommendationEntry(key, getPropertyValue(key), None)) + TuningEntry.build(key, getPropertyValue(key), None)) if (value != null) { recomRecord.setRecommendedValue(value) - if (recomRecord.original.isEmpty) { + if (recomRecord.originalValue.isEmpty) { // add a comment that the value was missing in the cluster properties appendComment(s"'$key' was not set.") } @@ -688,7 +620,7 @@ class AutoTuner( } appendRecommendation("spark.rapids.sql.batchSizeBytes", autoTunerConfigsProvider.BATCH_SIZE_BYTES) - appendRecommendation("spark.locality.wait", 0) + appendRecommendation("spark.locality.wait", "0") } def calculateJobLevelRecommendations(): Unit = { @@ -696,7 +628,7 @@ class AutoTuner( // set the Spark config spark.shuffle.sort.bypassMergeThreshold getShuffleManagerClassName match { case Right(smClassName) => appendRecommendation("spark.shuffle.manager", smClassName) - case Left(comment) => appendComment(comment) + case Left(comment) => appendComment("spark.shuffle.manager", comment) } appendComment(autoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")) recommendFileCache() @@ -787,6 +719,7 @@ class AutoTuner( } private def recommendAQEProperties(): Unit = { + // Spark configuration (AQE is enabled by default) val aqeEnabled = getPropertyValue("spark.sql.adaptive.enabled") .getOrElse("false").toLowerCase if (aqeEnabled == "false") { @@ -851,13 +784,14 @@ class AutoTuner( } // TODO - can we set spark.sql.autoBroadcastJoinThreshold ??? + val autoBroadcastJoinKey = "spark.sql.adaptive.autoBroadcastJoinThreshold" val autoBroadcastJoinThresholdProperty = - getPropertyValue("spark.sql.adaptive.autoBroadcastJoinThreshold").map(StringUtils.convertToMB) + getPropertyValue(autoBroadcastJoinKey).map(StringUtils.convertToMB) if (autoBroadcastJoinThresholdProperty.isEmpty) { - appendComment("'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.") + appendComment(autoBroadcastJoinKey, s"'$autoBroadcastJoinKey' was not set.") } else if (autoBroadcastJoinThresholdProperty.get > StringUtils.convertToMB(autoTunerConfigsProvider.AQE_AUTOBROADCAST_JOIN_THRESHOLD)) { - appendComment("Setting 'spark.sql.adaptive.autoBroadcastJoinThreshold' > " + + appendComment(s"Setting '$autoBroadcastJoinKey' > " + s"${autoTunerConfigsProvider.AQE_AUTOBROADCAST_JOIN_THRESHOLD} could " + s"lead to performance\n" + " regression. Should be set to a lower number.") @@ -1057,13 +991,8 @@ class AutoTuner( case Some(f) => f.contains("com.nvidia.spark.SQLPlugin") case None => false } - val rapidsEnabled = getPropertyValue("spark.rapids.sql.enabled") match { - case Some(f) => f.toBoolean - case None => true - } - if (!rapidsEnabled) { - appendRecommendation("spark.rapids.sql.enabled", "true") - } + // Set the plugin to True without need to check if it is already set. + appendRecommendation("spark.rapids.sql.enabled", "true") if (!isPluginLoaded) { appendComment("RAPIDS Accelerator for Apache Spark jar is missing in \"spark.plugins\". " + "Please refer to " + @@ -1081,6 +1010,22 @@ class AutoTuner( comments += comment } + /** + * Adds a comment for a configuration key when AutoTuner cannot provide a recommended value, + * but the configuration is necessary. + */ + private def appendComment( + key: String, + comment: String, + fillInValue: Option[String] = None): Unit = { + if (!skippedRecommendations.contains(key)) { + val recomRecord = recommendations.getOrElseUpdate(key, + TuningEntry.build(key, getPropertyValue(key), None)) + recomRecord.markAsUnresolved(fillInValue) + comments += comment + } + } + def convertClusterPropsToString(): String = { clusterProps.toString } @@ -1113,18 +1058,23 @@ class AutoTuner( comments.map(RecommendedCommentResult).sortBy(_.comment) } - private def toRecommendationsProfileResult: Seq[RecommendedPropertyResult] = { - val finalRecommendations = - recommendations.filter(elem => elem._2.isValid(filterByUpdatedPropertiesEnabled)) - finalRecommendations.collect { - case (key, record) => RecommendedPropertyResult(key, record.recommended.get) - }.toSeq.sortBy(_.property) + private def toRecommendationsProfileResult: Seq[TuningEntryTrait] = { + val recommendationEntries = if (filterByUpdatedPropertiesEnabled) { + recommendations.values.filter(_.isTuned()) + } else { + recommendations.values.filter(_.isEnabled()) + } + recommendationEntries.toSeq.sortBy(_.name) + } + + protected def finalizeTuning(): Unit = { + recommendations.values.foreach(_.commit()) } /** * The Autotuner loads the spark properties from either the ClusterProperties or the eventlog. - * 1- runs the calculation for each criterion and saves it as a [[RecommendationEntry]]. - * 2- The final list of recommendations include any [[RecommendationEntry]] that has a + * 1- runs the calculation for each criterion and saves it as a [[TuningEntryTrait]]. + * 2- The final list of recommendations include any [[TuningEntryTrait]] that has a * recommendation that is different from the original property. * 3- Null values are excluded. * 4- A comment is added for each missing property in the spark property. @@ -1143,7 +1093,7 @@ class AutoTuner( skipList: Option[Seq[String]] = Some(Seq()), limitedLogicList: Option[Seq[String]] = Some(Seq()), showOnlyUpdatedProps: Boolean = true): - (Seq[RecommendedPropertyResult], Seq[RecommendedCommentResult]) = { + (Seq[TuningEntryTrait], Seq[RecommendedCommentResult]) = { if (appInfoProvider.isAppInfoAvailable) { limitedLogicList.foreach(limitedSeq => limitedLogicRecommendations ++= limitedSeq) skipList.foreach(skipSeq => skippedRecommendations ++= skipSeq) @@ -1172,6 +1122,7 @@ class AutoTuner( } } recommendFromDriverLogs() + finalizeTuning() (toRecommendationsProfileResult, toCommentProfileResult) } @@ -1186,13 +1137,13 @@ class AutoTuner( // Combines the original Spark properties with the recommended ones. def combineSparkProperties( - recommendedSet: Seq[RecommendedPropertyResult]): Seq[RecommendedPropertyResult] = { + recommendedSet: Seq[TuningEntryTrait]): Seq[RecommendedPropertyResult] = { // get the original properties after filtering the and removing unnecessary keys val originalPropsFiltered = processPropKeys(getAllProperties) // Combine the original properties with the recommended properties. // The recommendations should always override the original ones val combinedProps = (originalPropsFiltered - ++ recommendedSet.map(r => r.property -> r.value).toMap).toSeq.sortBy(_._1) + ++ recommendedSet.map(r => r.name -> r.getTuneValue()).toMap).toSeq.sortBy(_._1) combinedProps.collect { case (pK, pV) => RecommendedPropertyResult(pK, pV) } @@ -1285,19 +1236,7 @@ trait AutoTunerConfigsProvider extends Logging { ) ++ commentsForMissingMemoryProps // scalastyle:off line.size.limit - val recommendationsTarget: Seq[String] = Seq[String]( - "spark.executor.instances", - "spark.rapids.sql.enabled", - "spark.executor.cores", - "spark.executor.memory", - "spark.rapids.sql.concurrentGpuTasks", - "spark.task.resource.gpu.amount", - "spark.sql.shuffle.partitions", - "spark.sql.files.maxPartitionBytes", - "spark.rapids.memory.pinnedPool.size", - "spark.executor.memoryOverhead", - "spark.executor.memoryOverheadFactor", - "spark.kubernetes.memoryOverheadFactor") + lazy val recommendationsTarget: Iterable[String] = TuningEntryDefinition.TUNING_TABLE.keys val classPathComments: Map[String, String] = Map( "rapids.jars.missing" -> diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/BootstrapReport.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/BootstrapReport.scala new file mode 100644 index 000000000..db5527e73 --- /dev/null +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/BootstrapReport.scala @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tool.tuning + +import com.nvidia.spark.rapids.tool.ToolTextFileWriter +import org.apache.hadoop.conf.Configuration + +/** + * A class that generates the report containing only the required and tuned configurations. + * @param tuningResult The result of the tuning process + * @param outputDir The directory where the report will be written. + * @param hadoopConf The Hadoop configuration + */ +class BootstrapReport(tuningResult: TuningResult, + outputDir: String, hadoopConf: Configuration) { + + /** + * Loads the bootstrap entries from the tuning result. This applies for any entry that is + * not removed. + * @return the list of bootstrap entries + */ + private def loadBootstrapEntries(): Seq[TuningEntryTrait] = { + tuningResult.recommendations.filter(e => e.isEnabled() && e.isBootstrap() && !e.isRemoved()) + } + + def generateReport(): Unit = { + val textFileWriter = new ToolTextFileWriter(outputDir, + s"${tuningResult.appID}-bootstrap.conf", + s"Required and Tuned configurations to run - ${tuningResult.appID}", Option(hadoopConf)) + try { + textFileWriter.write(loadBootstrapEntries().map(_.toConfString).reduce(_ + "\n" + _)) + } finally { + textFileWriter.close() + } + } +} diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerRunner.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerRunner.scala index cd4c74c17..5fad6ae51 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerRunner.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerRunner.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,6 +50,9 @@ class QualificationAutoTunerRunner(val appInfoProvider: QualAppSummaryInfoProvid } finally { textFileWriter.close() } + // Write down the recommended properties + val bootstrapReport = new BootstrapReport(tuningResult, outputDir, hadoopConf) + bootstrapReport.generateReport() // Write down the combined configurations tuningResult.combinedProps.collect { case combinedProps => @@ -77,7 +80,7 @@ class QualificationAutoTunerRunner(val appInfoProvider: QualAppSummaryInfoProvid // Otherwise, it is difficult to separate them logically. val combinedProps = autoTuner.combineSparkProperties(recommendations) val resultRecord = TuningResult(appInfoProvider.getAppID, recommendations, - comments, Option(combinedProps)) + comments, combinedProps = Option(combinedProps)) writeTuningReport(resultRecord, tunerContext.getOutputPath, tunerContext.hadoopConf) resultRecord } diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TunerContext.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TunerContext.scala index bbf7fb9b6..c0f84696d 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TunerContext.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TunerContext.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ import org.apache.spark.sql.rapids.tool.util.RapidsToolsConfUtil case class TuningResult( appID: String, - recommendations: Seq[RecommendedPropertyResult], + recommendations: Seq[TuningEntryTrait], comments: Seq[RecommendedCommentResult], combinedProps: Option[Seq[RecommendedPropertyResult]] = None) diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntry.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntry.scala new file mode 100644 index 000000000..7bf74cdb2 --- /dev/null +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntry.scala @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tool.tuning + +import org.apache.spark.internal.Logging + +/** + * A wrapper to the hold the tuning entry information. + * @param name the name of the property + * @param originalValue the value from the eventlog + * @param tunedValue the value recommended by the AutoTuner + * @param definition the definition of the tuning entry. + */ +class TuningEntry( + override val name: String, + override var originalValue: Option[String], + override var tunedValue: Option[String], + definition: Option[TuningEntryDefinition] = None) extends TuningEntryTrait { + + /** + * Set the original value from the default value in Spark if it exists. + * This is needed because some properties may not be set relying on the default value defined by + * Spark configurations. + */ + override def setOriginalValueFromDefaultSpark(): Unit = { + originalValue match { + case Some(_) => // Do Nothing + case None => + definition match { + case Some(defn) => + if (defn.hasDefaultSpark()) { + originalValue = Some(defn.defaultSpark) + } + case None => // Do Nothing + } + } + } + + override def isBootstrap(): Boolean = { + definition match { + case Some(defn) => defn.isBootstrap() + case None => name.startsWith("spark.rapids.") + } + } + + override def isEnabled(): Boolean = { + val globalFlag = definition match { + case Some(defn) => defn.isEnabled() + case None => true + } + globalFlag && enabled + } + + ///////////////////////// + // Initialization Code // + ///////////////////////// + + setOriginalValueFromDefaultSpark() +} + +object TuningEntry extends Logging { + /** + * Build a TuningEntry object and automatically pull the information from Tuning Entry Table. + * @param name the property label + * @param originalValue the original value from the eventlog + * @param tunedValue the value recommended by the AutoTuner + * @return a TuningEntry object + */ + def build( + name: String, + originalValue: Option[String], + tunedValue: Option[String]): TuningEntry = { + // pul the information from Tuning Entry Table + val tuningDefinition = TuningEntryDefinition.TUNING_TABLE.get(name) + // for debugging purpose + if (tuningDefinition.isEmpty) { + logInfo("Tuning Entry is not defined for " + name) + } + new TuningEntry(name, originalValue, tunedValue, tuningDefinition) + } +} diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryDefinition.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryDefinition.scala new file mode 100644 index 000000000..c6223e401 --- /dev/null +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryDefinition.scala @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tool.tuning + +import scala.beans.BeanProperty + +import org.yaml.snakeyaml.{DumperOptions, LoaderOptions, Yaml} +import org.yaml.snakeyaml.constructor.Constructor +import org.yaml.snakeyaml.representer.Representer +import scala.collection.JavaConverters._ +import scala.collection.breakOut + +import org.apache.spark.sql.rapids.tool.util.UTF8Source + +/** + * A wrapper to the hold the tuning entry information. + * @param label the property name + * @param description used to explain the importance of that property and how it is used + * @param enabled global flag to enable/disable the tuning entry. This is used to turn off a + * tuning entry + * @param level This is used to group the tuning entries (job/cluster) + * @param category Indicates the purpose of that property for RAPIDS. + * "functionality": required to enable RAPIDS + * "tuning": required to tune the runtime. + * @param bootstrapEntry When true, the property should be added to the bootstrap configuration. + * Default is true. + * @param defaultSpark The default value of the property in Spark. This is used to set the + * originalValue of the property in case it is not set by the eventlog. + */ +class TuningEntryDefinition( + @BeanProperty var label: String, + @BeanProperty var description: String, + @BeanProperty var enabled: Boolean, + @BeanProperty var level: String, + @BeanProperty var category: String, + @BeanProperty var bootstrapEntry: Boolean, + @BeanProperty var defaultSpark: String) { + def this() = { + this(label = "", description = "", enabled = true, level = "", category = "", + bootstrapEntry = true, defaultSpark = null) + } + + def isEnabled(): Boolean = { + enabled + } + def isBootstrap(): Boolean = { + bootstrapEntry || label.startsWith("spark.rapids.") + } + + /** + * Indicates if the property has a default value in Spark. This implies that the default value + * can be used to set the original value of the property. + * @return true if the property has a default value in Spark. + */ + def hasDefaultSpark(): Boolean = { + defaultSpark != null + } +} + +class TuningEntries( + @BeanProperty var tuningDefinitions: java.util.List[TuningEntryDefinition]) { + def this() = { + this(new java.util.ArrayList[TuningEntryDefinition]()) + } +} + + +object TuningEntryDefinition { + // A static Map between the propertyName and the TuningEntryDefinition + lazy val TUNING_TABLE: Map[String, TuningEntryDefinition] = loadTable() + + /** + * Load the tuning table from the yaml file. + * @return a map between property name and the TuningEntryDefinition + */ + private def loadTable(): Map[String, TuningEntryDefinition] = { + val yamlSource = + UTF8Source.fromResource("bootstrap/tuningTable.yaml").mkString + val representer = new Representer(new DumperOptions()) + representer.getPropertyUtils.setSkipMissingProperties(true) + val constructor = new Constructor(classOf[TuningEntries], new LoaderOptions()) + val yamlObjNested = new Yaml(constructor, representer) + val entryTable: TuningEntries = yamlObjNested.load(yamlSource).asInstanceOf[TuningEntries] + // load the enabled entries. + entryTable.tuningDefinitions.asScala.collect { + case e if e.isEnabled() => (e.label, e) + }(breakOut) + } +} diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala new file mode 100644 index 000000000..f4ca1c9c9 --- /dev/null +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningEntryTrait.scala @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tool.tuning + +import scala.collection.mutable.ListBuffer + +import com.nvidia.spark.rapids.tool.tuning.TuningOpTypes.TuningOpType + +import org.apache.spark.sql.rapids.tool.util.StringUtils + +/** + * A trait that defines the behavior of the Tuning Entry. + */ +trait TuningEntryTrait { + val name: String + // The value recommended by the AutoTuner + var tunedValue: Option[String] + // The original value of the property from the event log + var originalValue: Option[String] + var enabled: Boolean = true + + // The type of tuning operation to be performed + var tuningOpType: TuningOpType = TuningOpTypes.UNKNOWN + // Comments specific to the property + val comments: ListBuffer[String] = ListBuffer[String]() + // The value to be used when the property is unresolved. + var fillUnresolved: Option[String] = Some("[FILL_IN_VALUE]") + + def toConfString: String = { + "--conf %s=%s".format(name, getTuneValue()) + } + + def setOriginalValueFromDefaultSpark(): Unit + + /** + * Indicates if the property is resolved by the AutoTuner or not. This is used to distinguish + * the properties that were not tuned due to a failure in the AutoTuner. For example, + * shuffleManager was not able to find the className but we still want to include this + * property in the bootstrap configuration. + * @param fill the value to be used to fill in the gap if necessary. + */ + def markAsUnresolved(fill: Option[String] = None): Unit = { + setTuningOpType(TuningOpTypes.UNRESOLVED) + if (fill.isDefined) { + fillUnresolved = fill + } + } + + /** + * Disables the property. This is used to turnoff the property if it is not applicable. + */ + def disable(): Unit = { + enabled = false + } + + /** + * Returns the value of the property as a string. + * When the tunedValue is not set, it will set it as [Fill_IN_VALUE] so that it can be replaced + * by the user. This is used because the AutoTuner may not be able to successfully make + * recommendations. Yet, we want to include that in the final tuning report. + * Note that the name is not "getTunedValue" because the purpose is different. + * + * @param fillIfBlank the value of the content if the TunedValue is empty + * @return the value of the property as a string. + */ + def getTuneValue(fillIfBlank: Option[String] = None): String = { + if (isUnresolved()) { + fillIfBlank.getOrElse(fillUnresolved.get) + } else { + // It is possible the the propery was not tuned. However, we should not be in that case + // because by calling commit we must have copied the tuned from the original. + tunedValue.getOrElse(fillIfBlank.getOrElse(originalValue.getOrElse("[UNDEFINED]"))) + } + } + + /** + * Indicates that a specific configuration should be removed from the configuration. + */ + def markAsRemoved(): Unit = { + tunedValue = None + setTuningOpType(TuningOpTypes.REMOVE) + } + + def setRecommendedValue(value: String): Unit = { + tunedValue = Option(value) + updateOpType() + } + + /** + * Indicates if the property is tuned. This is used to filter out the entries that stayed the + * same. + * @return true if it was changed by the AutoTuner or false otherwise. + */ + def isTuned(): Boolean = { + isEnabled() && TuningOpTypes.isTuned(tuningOpType) + } + + def isUnresolved(): Boolean = { + tuningOpType == TuningOpTypes.UNRESOLVED + } + + /** + * Indicates if the property is removed by the AutoTuner + */ + def isRemoved(): Boolean = { + tuningOpType == TuningOpTypes.REMOVE + } + + /** + * Indicates if the property is a bootstrap property. + * A bootstrap property is a property that is required to be set by the AutoTuner + */ + def isBootstrap(): Boolean + + /** + * Indicates if the property is enabled. + */ + def isEnabled(): Boolean + + /** + * Used to compare between two properties by converting memory units to equivalent + * representations. + * @param propValue property to be processed. + * @return the uniform representation of property. + * For Memory, the value is converted to bytes. + */ + private def getRawValue(propValue: Option[String]): Option[String] = { + propValue match { + case None => None + case Some(value) => + if (StringUtils.isMemorySize(value)) { + // if it is memory return the bytes unit + Some(s"${StringUtils.convertMemorySizeToBytes(value)}") + } else { + propValue + } + } + } + + def setTuningOpType(opType: TuningOpType): Unit = { + tuningOpType = opType + } + + /** + * Updates the tuning operation type based on the original and tuned values. + */ + def updateOpType(): Unit = { + if (!(isRemoved() || isUnresolved())) { + val originalVal = getRawValue(originalValue) + val recommendedVal = getRawValue(tunedValue) + (originalVal, recommendedVal) match { + case (None, None) => setTuningOpType(TuningOpTypes.UNKNOWN) + case (Some(orig), Some(rec)) => + if (orig != rec) { + setTuningOpType(TuningOpTypes.UPDATE) + } else { + setTuningOpType(TuningOpTypes.CLONE) + } + case (None, Some(_)) => setTuningOpType(TuningOpTypes.ADD) + case (Some(orig), None) => + // It is possible that the property is not set by the AutoTuner, then it means it should + // be copied from the original configuration. + setRecommendedValue(orig) + } + } + } + + def commit(): Unit = { + updateOpType() + } +} diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningOpTypes.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningOpTypes.scala new file mode 100644 index 000000000..9b81729e4 --- /dev/null +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningOpTypes.scala @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tool.tuning + +/** + * Enumerated type to define the different modifications that the AutoTuner performs on + * a sepecific property. + */ +object TuningOpTypes extends Enumeration { + type TuningOpType = Value + val ADD, // the property is added + REMOVE, // the property is removed + UPDATE, // the property is updated + CLONE, // the property is the same + UNRESOLVED, // the property is processed by the AutoTuner but the value is not resolved + UNKNOWN = Value + + def isTuned(tuningOpType: TuningOpType): Boolean = { + tuningOpType == ADD || tuningOpType == UPDATE || + tuningOpType == REMOVE || tuningOpType == UNRESOLVED + } +} diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala index 19100b534..f1cd67f9c 100644 --- a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala +++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala @@ -80,6 +80,7 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite { |--conf spark.executor.instances=2 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -91,9 +92,9 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite { |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark320.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=512m - |--conf spark.sql.shuffle.partitions=200 |--conf spark.task.resource.gpu.amount=0.001 | |Comments: @@ -113,10 +114,8 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite { |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.files.maxPartitionBytes' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- 'spark.task.resource.gpu.amount' was not set. |- Number of workers is missing. Setting default to 1. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} @@ -135,10 +134,10 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite { val expectedResults = s"""| |Spark Properties: + |--conf spark.locality.wait=0 |--conf spark.rapids.sql.batchSizeBytes=2147483647 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.files.maxPartitionBytes=512m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.executor.cores' should be set to 16. @@ -151,7 +150,6 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite { |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.files.maxPartitionBytes' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- 'spark.task.resource.gpu.amount' should be set to 0.001. |- Could not infer the cluster configuration, recommendations are generated using default values! |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} @@ -171,10 +169,10 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite { val expectedResults = s"""| |Spark Properties: + |--conf spark.locality.wait=0 |--conf spark.rapids.sql.batchSizeBytes=2147483647 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.files.maxPartitionBytes=512m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.executor.cores' should be set to 16. @@ -187,7 +185,6 @@ class ProfilingAutoTunerSuite extends BaseAutoTunerSuite { |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.files.maxPartitionBytes' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- 'spark.task.resource.gpu.amount' should be set to 0.001. |- Could not infer the cluster configuration, recommendations are generated using default values! |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} @@ -226,10 +223,10 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. val expectedResults = s"""| |Spark Properties: + |--conf spark.locality.wait=0 |--conf spark.rapids.sql.batchSizeBytes=2147483647 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.files.maxPartitionBytes=512m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.executor.cores' should be set to 16. @@ -242,7 +239,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.files.maxPartitionBytes' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- 'spark.task.resource.gpu.amount' should be set to 0.001. |- Could not infer the cluster configuration, recommendations are generated using default values! |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} @@ -270,6 +266,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=8 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -281,9 +278,9 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=512m - |--conf spark.sql.shuffle.partitions=200 |--conf spark.task.resource.gpu.amount=0.001 | |Comments: @@ -303,10 +300,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.files.maxPartitionBytes' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- 'spark.task.resource.gpu.amount' was not set. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -333,6 +328,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=2 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -344,9 +340,9 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=512m - |--conf spark.sql.shuffle.partitions=200 |--conf spark.task.resource.gpu.amount=0.001 | |Comments: @@ -366,10 +362,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.files.maxPartitionBytes' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- 'spark.task.resource.gpu.amount' was not set. |- Number of workers is missing. Setting default to 1. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} @@ -406,6 +400,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |Spark Properties: |--conf spark.executor.instances=4 |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 @@ -415,8 +410,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set. @@ -429,8 +424,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- GPU count is missing. Setting default to 1. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -470,6 +463,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |Spark Properties: |--conf spark.executor.instances=8 |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 @@ -478,8 +472,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set. @@ -488,8 +482,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- GPU memory is missing. Setting default to 15109m. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -530,6 +522,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |Spark Properties: |--conf spark.executor.instances=8 |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 @@ -537,7 +530,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000 |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 - |--conf spark.sql.shuffle.partitions=200 + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] | |Comments: |- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set. @@ -545,7 +538,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- GPU memory is missing. Setting default to 15109m. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -583,6 +575,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |Spark Properties: |--conf spark.executor.instances=8 |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 @@ -591,8 +584,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set. @@ -601,8 +594,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- GPU device is missing. Setting default to $T4Gpu. |- GPU memory is missing. Setting default to ${T4Gpu.getMemory}. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} @@ -642,6 +633,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |Spark Properties: |--conf spark.executor.instances=8 |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 @@ -650,8 +642,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set. @@ -660,8 +652,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- GPU device is missing. Setting default to $T4Gpu. |- GPU memory is missing. Setting default to ${T4Gpu.getMemory}. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} @@ -691,6 +681,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=2 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -702,9 +693,9 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=512m - |--conf spark.sql.shuffle.partitions=200 |--conf spark.task.resource.gpu.amount=0.001 | |Comments: @@ -724,10 +715,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.files.maxPartitionBytes' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- 'spark.task.resource.gpu.amount' was not set. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -757,6 +746,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |Spark Properties: |--conf spark.executor.instances=8 |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 @@ -765,8 +755,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set. @@ -775,8 +765,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} |""".stripMargin @@ -816,6 +804,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.cores=16 |--conf spark.executor.instances=4 |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 @@ -824,8 +813,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set. @@ -834,8 +823,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} |""".stripMargin @@ -860,6 +847,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=8 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -871,9 +859,9 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=512m - |--conf spark.sql.shuffle.partitions=200 |--conf spark.task.resource.gpu.amount=0.001 | |Comments: @@ -893,10 +881,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.files.maxPartitionBytes' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- 'spark.task.resource.gpu.amount' was not set. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -962,6 +948,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -972,6 +959,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=4096m | @@ -983,7 +971,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -1039,6 +1026,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=5 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -1049,6 +1037,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=4096m | @@ -1060,7 +1049,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -1107,17 +1095,20 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 |--conf spark.rapids.sql.batchSizeBytes=2147483647 |--conf spark.rapids.sql.concurrentGpuTasks=2 + |--conf spark.rapids.sql.enabled=true |--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000 |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=4096m | @@ -1127,13 +1118,13 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.shuffle.multiThreaded.reader.threads' was not set. |- 'spark.rapids.shuffle.multiThreaded.writer.threads' was not set. |- 'spark.rapids.sql.batchSizeBytes' was not set. + |- 'spark.rapids.sql.enabled' was not set. |- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set. |- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- RAPIDS Accelerator for Apache Spark jar is missing in "spark.plugins". Please refer to https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} @@ -1184,6 +1175,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -1196,6 +1188,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=4096m | @@ -1211,7 +1204,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -1267,6 +1259,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -1277,6 +1270,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.files.maxPartitionBytes=3669m |--conf spark.task.resource.gpu.amount=0.001 | @@ -1345,6 +1339,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -1355,6 +1350,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.files.maxPartitionBytes=3669m |--conf spark.task.resource.gpu.amount=0.001 | @@ -1418,6 +1414,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -1428,6 +1425,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.files.maxPartitionBytes=3669m |--conf spark.task.resource.gpu.amount=0.001 | @@ -1483,6 +1481,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |Spark Properties: |--conf spark.executor.instances=8 |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 @@ -1491,8 +1490,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set. @@ -1501,8 +1500,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.multiple")} [23.06.0, 23.02.1] |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} |""".stripMargin @@ -1529,6 +1526,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |Spark Properties: |--conf spark.executor.instances=8 |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 @@ -1537,8 +1535,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set. @@ -1547,8 +1545,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- A newer RAPIDS Accelerator for Apache Spark plugin is available: | $pluginJarMvnURl | Version used in application is $jarVer. @@ -1572,6 +1568,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |Spark Properties: |--conf spark.executor.instances=8 |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 @@ -1580,8 +1577,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set. @@ -1590,8 +1587,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} |""".stripMargin val rapidsJarsArr = Seq(s"rapids-4-spark_2.12-$latestRelease.jar") @@ -1641,6 +1636,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.filecache.enabled=true |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g @@ -1652,6 +1648,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.files.maxPartitionBytes=3669m |--conf spark.task.resource.gpu.amount=0.001 | @@ -1716,6 +1713,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -1726,6 +1724,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.files.maxPartitionBytes=3669m |--conf spark.task.resource.gpu.amount=0.001 | @@ -1757,7 +1756,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. val (properties, comments) = autoTuner.getRecommendedProperties() // Assert recommendations are excluded in properties - assert(properties.map(_.property).forall(autoTuner.platform.isValidRecommendation)) + assert(properties.map(_.name).forall(autoTuner.platform.isValidRecommendation)) // Assert recommendations are skipped in comments assert(comments.map(_.comment).forall(autoTuner.platform.isValidComment)) } @@ -1811,6 +1810,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |Spark Properties: |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -1821,6 +1821,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=4096m | @@ -1831,7 +1832,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -1921,6 +1921,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=8 |--conf spark.executor.memory=32768m |--conf $memoryOverheadLabel=13516m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.reader.threads=24 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=24 @@ -1929,6 +1930,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=32 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=4096m | @@ -1942,7 +1944,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -1989,6 +1990,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=8 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=13516m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.reader.threads=24 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=24 @@ -1997,6 +1999,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=32 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=4096m | @@ -2009,7 +2012,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -2089,6 +2091,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=4 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=13516m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.reader.threads=24 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=24 @@ -2097,9 +2100,9 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=32 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=512m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.executor.instances' was not set. @@ -2113,10 +2116,8 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.files.maxPartitionBytes' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} |- ${ProfilingAutoTunerConfigsProvider.commentForExperimentalConfig("spark.rapids.sql.incompatibleDateFormats.enabled")} @@ -2222,6 +2223,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -2246,7 +2248,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- Setting 'spark.sql.adaptive.autoBroadcastJoinThreshold' > 100m could lead to performance\n regression. Should be set to a lower number. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -2366,6 +2367,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 @@ -2376,6 +2378,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.files.maxPartitionBytes=3669m |--conf spark.task.resource.gpu.amount=0.001 | @@ -2573,16 +2576,19 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 |--conf spark.rapids.sql.batchSizeBytes=2147483647 |--conf spark.rapids.sql.concurrentGpuTasks=2 + |--conf spark.rapids.sql.enabled=true |--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000 |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.files.maxPartitionBytes=3669m |--conf spark.sql.shuffle.partitions=400 | @@ -2590,6 +2596,7 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.executor.memoryOverhead' was not set. |- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set. |- 'spark.rapids.sql.batchSizeBytes' was not set. + |- 'spark.rapids.sql.enabled' was not set. |- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set. |- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. @@ -2597,7 +2604,6 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.shuffle.partitions' should be increased since spilling occurred in shuffle stages. - |- 'spark.sql.shuffle.partitions' was not set. |- Average JVM GC time is very high. Other Garbage Collectors can be used for better performance. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} @@ -2649,30 +2655,32 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 |--conf spark.rapids.sql.batchSizeBytes=2147483647 |--conf spark.rapids.sql.concurrentGpuTasks=2 + |--conf spark.rapids.sql.enabled=true |--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000 |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.files.maxPartitionBytes=3669m - |--conf spark.sql.shuffle.partitions=200 | |Comments: |- 'spark.executor.memoryOverhead' was not set. |- 'spark.rapids.shuffle.multiThreaded.maxBytesInFlight' was not set. |- 'spark.rapids.sql.batchSizeBytes' was not set. + |- 'spark.rapids.sql.enabled' was not set. |- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set. |- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. - |- 'spark.sql.shuffle.partitions' was not set. |- Average JVM GC time is very high. Other Garbage Collectors can be used for better performance. |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} |- Shuffle skew exists (when task's Shuffle Read Size > 3 * Avg Stage-level size) in @@ -2714,20 +2722,22 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m |--conf spark.kryo.registrator=com.nvidia.spark.rapids.GpuKryoRegistrator + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 |--conf spark.rapids.sql.batchSizeBytes=2147483647 |--conf spark.rapids.sql.concurrentGpuTasks=2 + |--conf spark.rapids.sql.enabled=true |--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000 |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=512m - |--conf spark.sql.shuffle.partitions=200 |--conf spark.task.resource.gpu.amount=0.001 | |Comments: @@ -2739,16 +2749,15 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.shuffle.multiThreaded.writer.threads' was not set. |- 'spark.rapids.sql.batchSizeBytes' was not set. |- 'spark.rapids.sql.concurrentGpuTasks' was not set. + |- 'spark.rapids.sql.enabled' was not set. |- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set. |- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.files.maxPartitionBytes' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- 'spark.task.resource.gpu.amount' was not set. |- RAPIDS Accelerator for Apache Spark jar is missing in "spark.plugins". Please refer to https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} @@ -2789,20 +2798,22 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m |--conf spark.kryo.registrator=org.apache.SomeRegistrator,org.apache.SomeOtherRegistrator,com.nvidia.spark.rapids.GpuKryoRegistrator + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 |--conf spark.rapids.sql.batchSizeBytes=2147483647 |--conf spark.rapids.sql.concurrentGpuTasks=2 + |--conf spark.rapids.sql.enabled=true |--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000 |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=512m - |--conf spark.sql.shuffle.partitions=200 |--conf spark.task.resource.gpu.amount=0.001 | |Comments: @@ -2813,16 +2824,15 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.shuffle.multiThreaded.writer.threads' was not set. |- 'spark.rapids.sql.batchSizeBytes' was not set. |- 'spark.rapids.sql.concurrentGpuTasks' was not set. + |- 'spark.rapids.sql.enabled' was not set. |- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set. |- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.files.maxPartitionBytes' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- 'spark.task.resource.gpu.amount' was not set. |- RAPIDS Accelerator for Apache Spark jar is missing in "spark.plugins". Please refer to https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} @@ -2863,20 +2873,22 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m |--conf spark.kryo.registrator=com.nvidia.spark.rapids.GpuKryoRegistrator + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 |--conf spark.rapids.sql.batchSizeBytes=2147483647 |--conf spark.rapids.sql.concurrentGpuTasks=2 + |--conf spark.rapids.sql.enabled=true |--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000 |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark$testSmVersion.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=512m - |--conf spark.sql.shuffle.partitions=200 |--conf spark.task.resource.gpu.amount=0.001 | |Comments: @@ -2887,16 +2899,15 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.shuffle.multiThreaded.writer.threads' was not set. |- 'spark.rapids.sql.batchSizeBytes' was not set. |- 'spark.rapids.sql.concurrentGpuTasks' was not set. + |- 'spark.rapids.sql.enabled' was not set. |- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set. |- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.files.maxPartitionBytes' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- 'spark.task.resource.gpu.amount' was not set. |- RAPIDS Accelerator for Apache Spark jar is missing in "spark.plugins". Please refer to https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.jars.missing")} @@ -2936,20 +2947,22 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |--conf spark.executor.instances=10 |--conf spark.executor.memory=32768m |--conf spark.executor.memoryOverhead=17612m + |--conf spark.locality.wait=0 |--conf spark.rapids.memory.pinnedPool.size=4096m |--conf spark.rapids.shuffle.multiThreaded.maxBytesInFlight=4g |--conf spark.rapids.shuffle.multiThreaded.reader.threads=28 |--conf spark.rapids.shuffle.multiThreaded.writer.threads=28 |--conf spark.rapids.sql.batchSizeBytes=2147483647 |--conf spark.rapids.sql.concurrentGpuTasks=2 + |--conf spark.rapids.sql.enabled=true |--conf spark.rapids.sql.format.parquet.multithreaded.combine.waitTime=1000 |--conf spark.rapids.sql.multiThreadedRead.numThreads=80 |--conf spark.rapids.sql.reader.multithreaded.combine.sizeBytes=10485760 |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark341.RapidsShuffleManager |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m + |--conf spark.sql.adaptive.autoBroadcastJoinThreshold=[FILL_IN_VALUE] |--conf spark.sql.adaptive.coalescePartitions.minPartitionSize=4m |--conf spark.sql.files.maxPartitionBytes=512m - |--conf spark.sql.shuffle.partitions=200 |--conf spark.task.resource.gpu.amount=0.001 | |Comments: @@ -2960,16 +2973,15 @@ We recommend using nodes/workers with more memory. Need at least 17496MB memory. |- 'spark.rapids.shuffle.multiThreaded.writer.threads' was not set. |- 'spark.rapids.sql.batchSizeBytes' was not set. |- 'spark.rapids.sql.concurrentGpuTasks' was not set. + |- 'spark.rapids.sql.enabled' was not set. |- 'spark.rapids.sql.format.parquet.multithreaded.combine.waitTime' was not set. |- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set. |- 'spark.rapids.sql.reader.multithreaded.combine.sizeBytes' was not set. |- 'spark.shuffle.manager' was not set. |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set. |- 'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set. - |- 'spark.sql.adaptive.coalescePartitions.minPartitionSize' was not set. |- 'spark.sql.adaptive.enabled' should be enabled for better performance. |- 'spark.sql.files.maxPartitionBytes' was not set. - |- 'spark.sql.shuffle.partitions' was not set. |- 'spark.task.resource.gpu.amount' was not set. |- RAPIDS Accelerator for Apache Spark jar is missing in "spark.plugins". Please refer to https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html |- ${ProfilingAutoTunerConfigsProvider.classPathComments("rapids.shuffle.jars")} diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerSuite.scala index c6891d636..a65ddd064 100644 --- a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerSuite.scala +++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/QualificationAutoTunerSuite.scala @@ -76,14 +76,18 @@ class QualificationAutoTunerSuite extends BaseAutoTunerSuite { assert(expectedResults.forall(autoTunerOutput.contains)) } - test("test AutoTuner for Qualification sets shuffle partitions to 200") { - val autoTuner = buildDefaultAutoTuner() + test("test AutoTuner for Qualification should not change shuffle partitions") { + // Set shuffle partitions to 100. The AutoTuner should recommend the same value + // because currently shuffle.partitions is one of the limitedLogicRecommendations. + // It will not be added to the recommendations because the value has not changed. + val autoTuner = buildDefaultAutoTuner( + defaultSparkProps ++ mutable.Map("spark.sql.shuffle.partitions" -> "100") + ) val (properties, comments) = autoTuner.getRecommendedProperties() val autoTunerOutput = Profiler.getAutoTunerResultsAsString(properties, comments) val expectedResults = Seq( - "--conf spark.sql.shuffle.partitions=200", - "- 'spark.sql.shuffle.partitions' was not set." + "--conf spark.sql.shuffle.partitions" ) - assert(expectedResults.forall(autoTunerOutput.contains)) + assert(expectedResults.forall(t => !autoTunerOutput.contains(t))) } }