Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate a separate file to list bootstrap properties #1517

Merged
merged 8 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 158 additions & 0 deletions core/src/main/resources/bootstrap/tuningTable.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# Copyright (c) 2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

tuningDefinitions:
- label: spark.executor.cores
description: ''
enabled: true
level: cluster
category: tuning
- label: spark.executor.instances
description: ''
enabled: true
level: cluster
category: tuning
- label: spark.executor.memory
description: ''
enabled: true
level: cluster
category: tuning
- label: spark.executor.memoryOverhead
description: ''
enabled: true
level: cluster
category: tuning
- label: spark.executor.memoryOverheadFactor
description: ''
enabled: true
level: cluster
category: tuning
- label: spark.kubernetes.memoryOverheadFactor
description: ''
enabled: true
level: cluster
category: tuning
- label: spark.locality.wait
description: 'The time to wait to launch a data-local task before giving up and launching it on a less-local node. It is recommended to avoid waiting for a data-local task.'
enabled: true
level: cluster
category: tuning
defaultSpark: 3s
- label: spark.rapids.filecache.enabled
description: ''
enabled: true
level: job
- label: spark.rapids.memory.pinnedPool.size
description: ''
enabled: true
level: cluster
- label: spark.rapids.shuffle.multiThreaded.maxBytesInFlight
description: ''
enabled: true
level: cluster
category: tuning
- label: spark.rapids.shuffle.multiThreaded.reader.threads
description: ''
enabled: true
level: cluster
category: tuning
- label: spark.rapids.shuffle.multiThreaded.writer.threads
description: ''
enabled: true
level: cluster
category: tuning
- label: spark.rapids.sql.batchSizeBytes
description: ''
enabled: true
level: job
category: tuning
- label: spark.rapids.sql.concurrentGpuTasks
description: ''
enabled: true
level: cluster
category: tuning
- label: spark.rapids.sql.format.parquet.multithreaded.combine.waitTime
description: ''
enabled: true
level: cluster
category: tuning
- label: spark.rapids.sql.enabled
description: 'should be true to enable SQL operations on the GPU.'
enabled: true
level: cluster
category: functionality
- label: spark.rapids.sql.multiThreadedRead.numThreads
description: ''
enabled: true
level: cluster
category: tuning
- label: spark.rapids.sql.reader.multithreaded.combine.sizeBytes
description: ''
enabled: true
level: cluster
category: tuning
- label: spark.shuffle.manager
description: ''
enabled: true
level: cluster
- label: spark.sql.adaptive.enabled
description: ''
enabled: true
level: job
category: tuning
- label: spark.sql.adaptive.advisoryPartitionSizeInBytes
description: ''
enabled: true
level: job
category: tuning
- label: spark.sql.adaptive.coalescePartitions.initialPartitionNum
description: ''
enabled: true
level: job
category: tuning
- label: spark.sql.adaptive.coalescePartitions.minPartitionNum
description: ''
enabled: true
level: job
category: tuning
- label: spark.sql.adaptive.coalescePartitions.minPartitionSize
description: ''
enabled: true
level: job
category: tuning
- label: spark.sql.adaptive.coalescePartitions.parallelismFirst
description: ''
enabled: true
level: job
category: tuning
- label: spark.sql.adaptive.autoBroadcastJoinThreshold
description: ''
enabled: true
level: job
category: tuning
- label: spark.sql.files.maxPartitionBytes
description: ''
enabled: true
level: job
category: tuning
- label: spark.sql.shuffle.partitions
description: ''
enabled: true
level: job
category: tuning
- label: spark.task.resource.gpu.amount
description: ''
enabled: true
level: cluster
category: tuning
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import scala.collection.mutable.{ArrayBuffer, HashMap}
import scala.util.control.NonFatal

import com.nvidia.spark.rapids.tool.{AppSummaryInfoBaseProvider, EventLogInfo, EventLogPathProcessor, FailedEventLog, Platform, PlatformFactory, ToolBase}
import com.nvidia.spark.rapids.tool.tuning.{AutoTuner, ProfilingAutoTunerConfigsProvider}
import com.nvidia.spark.rapids.tool.tuning.{AutoTuner, ProfilingAutoTunerConfigsProvider, TuningEntryTrait}
import com.nvidia.spark.rapids.tool.views._
import org.apache.hadoop.conf.Configuration

Expand Down Expand Up @@ -416,7 +416,7 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs, enablePB: Boolea
*/
private def runAutoTuner(appInfo: Option[ApplicationSummaryInfo],
driverInfoProvider: DriverLogInfoProvider = BaseDriverLogInfoProvider.noneDriverLog)
: (Seq[RecommendedPropertyResult], Seq[RecommendedCommentResult]) = {
: (Seq[TuningEntryTrait], Seq[RecommendedCommentResult]) = {
// only run the auto tuner on GPU event logs for profiling tool right now. There are
// assumptions made in the code
if (appInfo.isDefined && appInfo.get.appInfo.head.pluginEnabled) {
Expand Down Expand Up @@ -611,10 +611,10 @@ object Profiler {
val COMBINED_LOG_FILE_NAME_PREFIX = "rapids_4_spark_tools_combined"
val SUBDIR = "rapids_4_spark_profile"

def getAutoTunerResultsAsString(props: Seq[RecommendedPropertyResult],
def getAutoTunerResultsAsString(props: Seq[TuningEntryTrait],
comments: Seq[RecommendedCommentResult]): String = {
val propStr = if (props.nonEmpty) {
val propertiesToStr = props.map(_.toString).reduce(_ + "\n" + _)
val propertiesToStr = props.map(_.toConfString).reduce(_ + "\n" + _)
s"\nSpark Properties:\n$propertiesToStr\n"
} else {
"Cannot recommend properties. See Comments.\n"
Expand Down
Loading