diff --git a/.github/workflows/markdown-links-check.yml b/.github/workflows/markdown-links-check.yml
new file mode 100644
index 000000000..833b99ec8
--- /dev/null
+++ b/.github/workflows/markdown-links-check.yml
@@ -0,0 +1,36 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# A workflow to check if PR got broken hyperlinks
+name: Check Markdown links
+
+on:
+ pull_request:
+ types: [opened, synchronize, reopened]
+
+jobs:
+ markdown-link-check:
+ runs-on: ubuntu-latest
+ steps:
+ - name: work around permission issue
+ run: git config --global --add safe.directory /github/workspace
+ - uses: actions/checkout@master
+ - uses: gaurav-nelson/github-action-markdown-link-check@v1
+ with:
+ max-depth: -1
+ use-verbose-mode: 'yes'
+ config-file: '.github/workflows/markdown-links-check/markdown-links-check-config.json'
+ base-branch: 'dev'
+
+
\ No newline at end of file
diff --git a/.github/workflows/markdown-links-check/markdown-links-check-config.json b/.github/workflows/markdown-links-check/markdown-links-check-config.json
new file mode 100644
index 000000000..ec4af8ca8
--- /dev/null
+++ b/.github/workflows/markdown-links-check/markdown-links-check-config.json
@@ -0,0 +1,17 @@
+{
+ "ignorePatterns": [
+ {
+ "pattern": "https://github.com/NVIDIA/spark-rapids-tools/issues/*"
+ },
+ {
+ "pattern": "http://localhost*"
+ },
+ {
+ "pattern": "https://www.nvidia.com/en-us/security/pgp-key"
+ }
+ ],
+ "timeout": "15s",
+ "retryOn429": true,
+ "retryCount":30,
+ "aliveStatusCodes": [200, 403]
+}
\ No newline at end of file
diff --git a/README.md b/README.md
index 42f142176..ac1a5dcfb 100644
--- a/README.md
+++ b/README.md
@@ -4,12 +4,15 @@ This repo provides the tools to use [RAPIDS Accelerator for Apache Spark](https:
## Catalog
-- [RAPIDS core tools](/core): Tools that help developers getting the most out of their Apache Spark applications
+- [RAPIDS core tools](./core): Tools that help developers getting the most out of their Apache
+ Spark applications
without any code change:
- Report acceleration potential of RAPIDS Accelerator for Apache Spark on a set of Spark applications.
- Generate comprehensive profiling analysis for Apache Sparks executing on accelerated GPU instances. This information
can be used to further tune and optimize the application.
-- [spark-rapids-user-tools](/user_tools): A simple wrapper process around cloud service providers to run
- [RAPIDS core tools](/core) across multiple cloud platforms. In addition, the output educates the users on
+- [spark-rapids-user-tools](./user_tools): A simple wrapper process around cloud service
+ providers to run
+ [RAPIDS core tools](./core) across multiple cloud platforms. In addition, the output educates
+ the users on
the cost savings and acceleration potential of RAPIDS Accelerator for Apache Spark and makes recommendations to tune
the application performance based on the cluster shape.
diff --git a/core/pom.xml b/core/pom.xml
index 0e2a28274..d7c04d332 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -23,7 +23,7 @@
rapids-4-spark-tools_2.12
RAPIDS Accelerator for Apache Spark tools
RAPIDS Accelerator for Apache Spark tools
- 23.10.1
+ 23.10.2-SNAPSHOT
jar
http://github.com/NVIDIA/spark-rapids-tools
diff --git a/core/src/main/resources/operatorsScore-databricks-aws.csv b/core/src/main/resources/operatorsScore-databricks-aws.csv
index 25296214c..394437c13 100644
--- a/core/src/main/resources/operatorsScore-databricks-aws.csv
+++ b/core/src/main/resources/operatorsScore-databricks-aws.csv
@@ -105,6 +105,7 @@ Expm1,2.45
First,2.45
Flatten,2.45
Floor,2.45
+FormatNumber,2.45
FromUTCTimestamp,2.45
FromUnixTime,2.45
GetArrayItem,2.45
@@ -175,6 +176,7 @@ Not,2.45
NthValue,2.45
OctetLength,2.45
Or,2.45
+Percentile,2.45
PercentRank,2.45
PivotFirst,2.45
Pmod,2.45
@@ -214,6 +216,7 @@ SortOrder,2.45
SparkPartitionID,2.45
SpecifiedWindowFrame,2.45
Sqrt,2.45
+Stack,2.45
StartsWith,2.45
StddevPop,2.45
StddevSamp,2.45
@@ -229,6 +232,7 @@ StringTranslate,2.45
StringTrim,2.45
StringTrimLeft,2.45
StringTrimRight,2.45
+StructsToJson,2.45
Substring,2.45
SubstringIndex,2.45
Subtract,2.45
diff --git a/core/src/main/resources/operatorsScore-databricks-azure.csv b/core/src/main/resources/operatorsScore-databricks-azure.csv
index 1e04e16c8..86daf247d 100644
--- a/core/src/main/resources/operatorsScore-databricks-azure.csv
+++ b/core/src/main/resources/operatorsScore-databricks-azure.csv
@@ -105,6 +105,7 @@ Expm1,2.73
First,2.73
Flatten,2.73
Floor,2.73
+FormatNumber,2.73
FromUTCTimestamp,2.73
FromUnixTime,2.73
GetArrayItem,2.73
@@ -175,6 +176,7 @@ Not,2.73
NthValue,2.73
OctetLength,2.73
Or,2.73
+Percentile,2.73
PercentRank,2.73
PivotFirst,2.73
Pmod,2.73
@@ -214,6 +216,7 @@ SortOrder,2.73
SparkPartitionID,2.73
SpecifiedWindowFrame,2.73
Sqrt,2.73
+Stack,2.73
StartsWith,2.73
StddevPop,2.73
StddevSamp,2.73
@@ -229,6 +232,7 @@ StringTranslate,2.73
StringTrim,2.73
StringTrimLeft,2.73
StringTrimRight,2.73
+StructsToJson,2.73
Substring,2.73
SubstringIndex,2.73
Subtract,2.73
diff --git a/core/src/main/resources/operatorsScore-dataproc-gke-l4.csv b/core/src/main/resources/operatorsScore-dataproc-gke-l4.csv
index 1426aa047..e1d3678d4 100644
--- a/core/src/main/resources/operatorsScore-dataproc-gke-l4.csv
+++ b/core/src/main/resources/operatorsScore-dataproc-gke-l4.csv
@@ -98,6 +98,7 @@ Expm1,3.74
First,3.74
Flatten,3.74
Floor,3.74
+FormatNumber,3.74
FromUTCTimestamp,3.74
FromUnixTime,3.74
GetArrayItem,3.74
@@ -168,6 +169,7 @@ Not,3.74
NthValue,3.74
OctetLength,3.74
Or,3.74
+Percentile,3.74
PercentRank,3.74
PivotFirst,3.74
Pmod,3.74
@@ -207,6 +209,7 @@ SortOrder,3.74
SparkPartitionID,3.74
SpecifiedWindowFrame,3.74
Sqrt,3.74
+Stack,3.74
StartsWith,3.74
StddevPop,3.74
StddevSamp,3.74
@@ -222,6 +225,7 @@ StringTranslate,3.74
StringTrim,3.74
StringTrimLeft,3.74
StringTrimRight,3.74
+StructsToJson,3.74
Substring,3.74
SubstringIndex,3.74
Subtract,3.74
diff --git a/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv b/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv
index 3083cbe8b..2777068b7 100644
--- a/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv
+++ b/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv
@@ -98,6 +98,7 @@ Expm1,3.65
First,3.65
Flatten,3.65
Floor,3.65
+FormatNumber,3.65
FromUTCTimestamp,3.65
FromUnixTime,3.65
GetArrayItem,3.65
@@ -168,6 +169,7 @@ Not,3.65
NthValue,3.65
OctetLength,3.65
Or,3.65
+Percentile,3.65
PercentRank,3.65
PivotFirst,3.65
Pmod,3.65
@@ -207,6 +209,7 @@ SortOrder,3.65
SparkPartitionID,3.65
SpecifiedWindowFrame,3.65
Sqrt,3.65
+Stack,3.65
StartsWith,3.65
StddevPop,3.65
StddevSamp,3.65
@@ -222,6 +225,7 @@ StringTranslate,3.65
StringTrim,3.65
StringTrimLeft,3.65
StringTrimRight,3.65
+StructsToJson,3.65
Substring,3.65
SubstringIndex,3.65
Subtract,3.65
diff --git a/core/src/main/resources/operatorsScore-dataproc-l4.csv b/core/src/main/resources/operatorsScore-dataproc-l4.csv
index e61959ea6..ad371bb8f 100644
--- a/core/src/main/resources/operatorsScore-dataproc-l4.csv
+++ b/core/src/main/resources/operatorsScore-dataproc-l4.csv
@@ -105,6 +105,7 @@ Expm1,4.16
First,4.16
Flatten,4.16
Floor,4.16
+FormatNumber,4.16
FromUTCTimestamp,4.16
FromUnixTime,4.16
GetArrayItem,4.16
@@ -175,6 +176,7 @@ Not,4.16
NthValue,4.16
OctetLength,4.16
Or,4.16
+Percentile,4.16
PercentRank,4.16
PivotFirst,4.16
Pmod,4.16
@@ -214,6 +216,7 @@ SortOrder,4.16
SparkPartitionID,4.16
SpecifiedWindowFrame,4.16
Sqrt,4.16
+Stack,4.16
StartsWith,4.16
StddevPop,4.16
StddevSamp,4.16
@@ -229,6 +232,7 @@ StringTranslate,4.16
StringTrim,4.16
StringTrimLeft,4.16
StringTrimRight,4.16
+StructsToJson,4.16
Substring,4.16
SubstringIndex,4.16
Subtract,4.16
diff --git a/core/src/main/resources/operatorsScore-dataproc-serverless-l4.csv b/core/src/main/resources/operatorsScore-dataproc-serverless-l4.csv
index 493f1154c..fbe0d057e 100644
--- a/core/src/main/resources/operatorsScore-dataproc-serverless-l4.csv
+++ b/core/src/main/resources/operatorsScore-dataproc-serverless-l4.csv
@@ -98,6 +98,7 @@ Expm1,4.25
First,4.25
Flatten,4.25
Floor,4.25
+FormatNumber,4.25
FromUTCTimestamp,4.25
FromUnixTime,4.25
GetArrayItem,4.25
@@ -168,6 +169,7 @@ Not,4.25
NthValue,4.25
OctetLength,4.25
Or,4.25
+Percentile,4.25
PercentRank,4.25
PivotFirst,4.25
Pmod,4.25
@@ -207,6 +209,7 @@ SortOrder,4.25
SparkPartitionID,4.25
SpecifiedWindowFrame,4.25
Sqrt,4.25
+Stack,4.25
StartsWith,4.25
StddevPop,4.25
StddevSamp,4.25
@@ -222,6 +225,7 @@ StringTranslate,4.25
StringTrim,4.25
StringTrimLeft,4.25
StringTrimRight,4.25
+StructsToJson,4.25
Substring,4.25
SubstringIndex,4.25
Subtract,4.25
diff --git a/core/src/main/resources/operatorsScore-dataproc-t4.csv b/core/src/main/resources/operatorsScore-dataproc-t4.csv
index 3be159955..c6f3a7654 100644
--- a/core/src/main/resources/operatorsScore-dataproc-t4.csv
+++ b/core/src/main/resources/operatorsScore-dataproc-t4.csv
@@ -105,6 +105,7 @@ Expm1,4.88
First,4.88
Flatten,4.88
Floor,4.88
+FormatNumber,4.88
FromUTCTimestamp,4.88
FromUnixTime,4.88
GetArrayItem,4.88
@@ -175,6 +176,7 @@ Not,4.88
NthValue,4.88
OctetLength,4.88
Or,4.88
+Percentile,4.88
PercentRank,4.88
PivotFirst,4.88
Pmod,4.88
@@ -214,6 +216,7 @@ SortOrder,4.88
SparkPartitionID,4.88
SpecifiedWindowFrame,4.88
Sqrt,4.88
+Stack,4.88
StartsWith,4.88
StddevPop,4.88
StddevSamp,4.88
@@ -229,6 +232,7 @@ StringTranslate,4.88
StringTrim,4.88
StringTrimLeft,4.88
StringTrimRight,4.88
+StructsToJson,4.88
Substring,4.88
SubstringIndex,4.88
Subtract,4.88
diff --git a/core/src/main/resources/operatorsScore-emr-a10.csv b/core/src/main/resources/operatorsScore-emr-a10.csv
index b7557b7ef..91a2a840d 100644
--- a/core/src/main/resources/operatorsScore-emr-a10.csv
+++ b/core/src/main/resources/operatorsScore-emr-a10.csv
@@ -105,6 +105,7 @@ Expm1,2.59
First,2.59
Flatten,2.59
Floor,2.59
+FormatNumber,2.59
FromUTCTimestamp,2.59
FromUnixTime,2.59
GetArrayItem,2.59
@@ -175,6 +176,7 @@ Not,2.59
NthValue,2.59
OctetLength,2.59
Or,2.59
+Percentile,2.59
PercentRank,2.59
PivotFirst,2.59
Pmod,2.59
@@ -214,6 +216,7 @@ SortOrder,2.59
SparkPartitionID,2.59
SpecifiedWindowFrame,2.59
Sqrt,2.59
+Stack,2.59
StartsWith,2.59
StddevPop,2.59
StddevSamp,2.59
@@ -229,6 +232,7 @@ StringTranslate,2.59
StringTrim,2.59
StringTrimLeft,2.59
StringTrimRight,2.59
+StructsToJson,2.59
Substring,2.59
SubstringIndex,2.59
Subtract,2.59
diff --git a/core/src/main/resources/operatorsScore-emr-t4.csv b/core/src/main/resources/operatorsScore-emr-t4.csv
index 05f293080..8d34a914c 100644
--- a/core/src/main/resources/operatorsScore-emr-t4.csv
+++ b/core/src/main/resources/operatorsScore-emr-t4.csv
@@ -105,6 +105,7 @@ Expm1,2.07
First,2.07
Flatten,2.07
Floor,2.07
+FormatNumber,2.07
FromUTCTimestamp,2.07
FromUnixTime,2.07
GetArrayItem,2.07
@@ -175,6 +176,7 @@ Not,2.07
NthValue,2.07
OctetLength,2.07
Or,2.07
+Percentile,2.07
PercentRank,2.07
PivotFirst,2.07
Pmod,2.07
@@ -214,6 +216,7 @@ SortOrder,2.07
SparkPartitionID,2.07
SpecifiedWindowFrame,2.07
Sqrt,2.07
+Stack,2.07
StartsWith,2.07
StddevPop,2.07
StddevSamp,2.07
@@ -229,6 +232,7 @@ StringTranslate,2.07
StringTrim,2.07
StringTrimLeft,2.07
StringTrimRight,2.07
+StructsToJson,2.07
Substring,2.07
SubstringIndex,2.07
Subtract,2.07
diff --git a/core/src/main/resources/operatorsScore.csv b/core/src/main/resources/operatorsScore-onprem.csv
similarity index 98%
rename from core/src/main/resources/operatorsScore.csv
rename to core/src/main/resources/operatorsScore-onprem.csv
index 3903479a6..50ec61028 100644
--- a/core/src/main/resources/operatorsScore.csv
+++ b/core/src/main/resources/operatorsScore-onprem.csv
@@ -110,6 +110,7 @@ Expm1,4
First,4
Flatten,4
Floor,4
+FormatNumber,4
FromUTCTimestamp,4
FromUnixTime,4
GetArrayItem,4
@@ -180,6 +181,7 @@ Not,4
NthValue,4
OctetLength,4
Or,4
+Percentile,4
PercentRank,4
PivotFirst,4
Pmod,4
@@ -219,6 +221,7 @@ SortOrder,4
SparkPartitionID,4
SpecifiedWindowFrame,4
Sqrt,4
+Stack,4
StartsWith,4
StddevPop,4
StddevSamp,4
@@ -234,6 +237,7 @@ StringTranslate,4
StringTrim,4
StringTrimLeft,4
StringTrimRight,4
+StructsToJson,4
Substring,4
SubstringIndex,4
Subtract,4
diff --git a/core/src/main/resources/supportedExecs.csv b/core/src/main/resources/supportedExecs.csv
index 130b0657a..f5a3fe7c4 100644
--- a/core/src/main/resources/supportedExecs.csv
+++ b/core/src/main/resources/supportedExecs.csv
@@ -19,7 +19,7 @@ HashAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS
ObjectHashAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS
SortAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS
InMemoryTableScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,PS,PS,PS,NS
-DataWritingCommandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,PS,NS,PS,NS,PS,PS,PS,NS
+DataWritingCommandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,PS,NS,S,NS,PS,PS,PS,NS
ExecutedCommandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S
BatchScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS
BroadcastExchangeExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
diff --git a/core/src/main/resources/supportedExprs.csv b/core/src/main/resources/supportedExprs.csv
index 40b15c5ee..f6d4ee1fc 100644
--- a/core/src/main/resources/supportedExprs.csv
+++ b/core/src/main/resources/supportedExprs.csv
@@ -19,8 +19,8 @@ Add,S,`+`,None,AST,rhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA
Add,S,`+`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA
Alias,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
Alias,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
-Alias,S, ,None,AST,input,S,S,S,S,S,S,S,S,PS,NS,NS,NS,NS,NS,NS,NS,NS,NS
-Alias,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+Alias,S, ,None,AST,input,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS
+Alias,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS
And,S,`and`,None,project,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
And,S,`and`,None,project,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
And,S,`and`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
@@ -79,7 +79,7 @@ Atanh,S,`atanh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,
Atanh,S,`atanh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Atanh,S,`atanh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
AttributeReference,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
-AttributeReference,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+AttributeReference,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS
BRound,S,`bround`,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA
BRound,S,`bround`,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
BRound,S,`bround`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA
@@ -191,8 +191,8 @@ EqualNullSafe,S,`<=>`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
EqualTo,S,`=`; `==`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS
EqualTo,S,`=`; `==`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS
EqualTo,S,`=`; `==`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-EqualTo,S,`=`; `==`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,NS,NS,NS,NS,NS,NS,NA,NS,NS
-EqualTo,S,`=`; `==`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,NS,NS,NS,NS,NS,NS,NA,NS,NS
+EqualTo,S,`=`; `==`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS
+EqualTo,S,`=`; `==`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS
EqualTo,S,`=`; `==`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Exp,S,`exp`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Exp,S,`exp`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
@@ -208,6 +208,9 @@ Flatten,S,`flatten`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Flatten,S,`flatten`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA
Floor,S,`floor`,None,project,input,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA
Floor,S,`floor`,None,project,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA
+FormatNumber,S,`format_number`,None,project,x,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA
+FormatNumber,S,`format_number`,None,project,d,NA,NA,NA,PS,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA
+FormatNumber,S,`format_number`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
FromUTCTimestamp,S,`from_utc_timestamp`,None,project,timestamp,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA
FromUTCTimestamp,S,`from_utc_timestamp`,None,project,timezone,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA
FromUTCTimestamp,S,`from_utc_timestamp`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA
@@ -233,14 +236,14 @@ GetTimestamp,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,N
GreaterThan,S,`>`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS
GreaterThan,S,`>`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS
GreaterThan,S,`>`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-GreaterThan,S,`>`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,NS,NS,NS,NS,NS,NS,NA,NS,NS
-GreaterThan,S,`>`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,NS,NS,NS,NS,NS,NS,NA,NS,NS
+GreaterThan,S,`>`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS
+GreaterThan,S,`>`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS
GreaterThan,S,`>`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
GreaterThanOrEqual,S,`>=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS
GreaterThanOrEqual,S,`>=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS
GreaterThanOrEqual,S,`>=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-GreaterThanOrEqual,S,`>=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,NS,NS,NS,NS,NS,NS,NA,NS,NS
-GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,NS,NS,NS,NS,NS,NS,NA,NS,NS
+GreaterThanOrEqual,S,`>=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS
+GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS
GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS
Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS
@@ -273,7 +276,7 @@ IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
JsonToStructs,NS,`from_json`,This is disabled by default because parsing JSON from a column has a large number of issues and should be considered beta quality right now.,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because parsing JSON from a column has a large number of issues and should be considered beta quality right now.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,NS,NA
+JsonToStructs,NS,`from_json`,This is disabled by default because parsing JSON from a column has a large number of issues and should be considered beta quality right now.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA
JsonTuple,S,`json_tuple`,None,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
JsonTuple,S,`json_tuple`,None,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA
JsonTuple,S,`json_tuple`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
@@ -301,20 +304,20 @@ Length,S,`length`; `character_length`; `char_length`,None,project,result,NA,NA,N
LessThan,S,`<`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS
LessThan,S,`<`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS
LessThan,S,`<`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-LessThan,S,`<`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,NS,NS,NS,NS,NS,NS,NA,NS,NS
-LessThan,S,`<`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,NS,NS,NS,NS,NS,NS,NA,NS,NS
+LessThan,S,`<`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS
+LessThan,S,`<`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS
LessThan,S,`<`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
LessThanOrEqual,S,`<=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS
LessThanOrEqual,S,`<=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS
LessThanOrEqual,S,`<=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-LessThanOrEqual,S,`<=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,NS,NS,NS,NS,NS,NS,NA,NS,NS
-LessThanOrEqual,S,`<=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,NS,NS,NS,NS,NS,NS,NA,NS,NS
+LessThanOrEqual,S,`<=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS
+LessThanOrEqual,S,`<=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS
LessThanOrEqual,S,`<=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Like,S,`like`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
Like,S,`like`,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA
Like,S,`like`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Literal,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS
-Literal,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+Literal,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS
Log,S,`ln`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Log,S,`ln`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Log10,S,`log10`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
@@ -487,6 +490,9 @@ Sqrt,S,`sqrt`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,
Sqrt,S,`sqrt`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Sqrt,S,`sqrt`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Sqrt,S,`sqrt`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Stack,S,`stack`,None,project,n,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Stack,S,`stack`,None,project,expr,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS
+Stack,S,`stack`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA
StartsWith,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
StartsWith,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA
StartsWith,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
@@ -533,6 +539,8 @@ StringTrimLeft,S,`ltrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,
StringTrimRight,S,`rtrim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
StringTrimRight,S,`rtrim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA
StringTrimRight,S,`rtrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
+StructsToJson,NS,`to_json`,This is disabled by default because to_json support is experimental. See compatibility guide for more information.,project,struct,S,S,S,S,S,S,S,NA,NA,S,NA,NA,NA,NA,S,S,S,NA
+StructsToJson,NS,`to_json`,This is disabled by default because to_json support is experimental. See compatibility guide for more information.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
Substring,S,`substr`; `substring`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA
Substring,S,`substr`; `substring`,None,project,pos,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Substring,S,`substr`; `substring`,None,project,len,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
@@ -665,6 +673,14 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS
Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS
+Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
+Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
+Percentile,S,`percentile`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
+Percentile,S,`percentile`,None,reduction,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,reduction,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
+Percentile,S,`percentile`,None,reduction,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
+Percentile,S,`percentile`,None,reduction,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
PivotFirst,S, ,None,aggregation,pivotColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS
PivotFirst,S, ,None,aggregation,valueColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS
PivotFirst,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,NS,NS
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/Platform.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/Platform.scala
new file mode 100644
index 000000000..67a36e8b9
--- /dev/null
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/Platform.scala
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.nvidia.spark.rapids.tool
+
+import scala.annotation.tailrec
+
+import org.apache.spark.internal.Logging
+
+/**
+ * Utility object containing constants for various platform names.
+ */
+object PlatformNames {
+ val DATABRICKS_AWS = "databricks-aws"
+ val DATABRICKS_AZURE = "databricks-azure"
+ val DATAPROC = "dataproc"
+ val DATAPROC_GKE_L4 = "dataproc-gke-l4"
+ val DATAPROC_GKE_T4 = "dataproc-gke-t4"
+ val DATAPROC_L4 = "dataproc-l4"
+ val DATAPROC_SL_L4 = "dataproc-serverless-l4"
+ val DATAPROC_T4 = "dataproc-t4"
+ val EMR = "emr"
+ val EMR_A10 = "emr-a10"
+ val EMR_T4 = "emr-t4"
+ val ONPREM = "onprem"
+ val DEFAULT: String = ONPREM
+
+ /**
+ * Return a list of all platform names.
+ */
+ def getAllNames: List[String] = List(
+ DATABRICKS_AWS, DATABRICKS_AZURE, DATAPROC, DATAPROC_GKE_L4, DATAPROC_GKE_T4,
+ DATAPROC_L4, DATAPROC_SL_L4, DATAPROC_T4, EMR, EMR_A10, EMR_T4, ONPREM
+ )
+}
+
+/**
+ * Represents a platform and its associated recommendations.
+ *
+ * @param platformName Name of the platform. See [[PlatformNames]] for supported platform names.
+ */
+class Platform(platformName: String) {
+ /**
+ * Recommendations to be excluded from the list of recommendations.
+ * These have the highest priority.
+ */
+ val recommendationsToExclude: Seq[String] = Seq.empty
+ /**
+ * Recommendations to be included in the final list of recommendations.
+ * These properties should be specific to the platform and not general Spark properties.
+ * For example: "spark.databricks.optimizer.dynamicFilePruning" for the Databricks platform.
+ *
+ * Represented as a tuple of (propertyKey, propertyValue).
+ */
+ val recommendationsToInclude: Seq[(String, String)] = Seq.empty
+ /**
+ * Dynamically calculates the recommendation for a specific Spark property by invoking
+ * the appropriate function based on `sparkProperty`.
+ * TODO: Implement this function and integrate with existing code in AutoTuner
+ *
+ * @param sparkProperty The Spark property for which the recommendation is calculated.
+ * @param args Variable list of arguments passed to the calculation function for dynamic
+ * processing.
+ * @return Optional string containing the recommendation, or `None` if unavailable.
+ */
+ def getRecommendation(sparkProperty: String, args: Any*): Option[String] = None
+
+ /**
+ * Checks if the `property` is valid:
+ * 1. It should not be in exclusion list
+ * OR
+ * 2. It should be in the inclusion list
+ */
+ def isValidRecommendation(property: String): Boolean = {
+ !recommendationsToExclude.contains(property) ||
+ recommendationsToInclude.map(_._1).contains(property)
+ }
+
+ /**
+ * Checks if the `comment` is valid:
+ * 1. It should not have any property from the exclusion list
+ */
+ def isValidComment(comment: String): Boolean = {
+ recommendationsToExclude.forall(excluded => !comment.contains(excluded))
+ }
+
+ def getName: String = platformName
+
+ def getOperatorScoreFile: String = {
+ s"operatorsScore-$platformName.csv"
+ }
+}
+
+class DatabricksPlatform(platformType: String) extends Platform(platformType) {
+ override val recommendationsToExclude: Seq[String] = Seq(
+ "spark.executor.cores",
+ "spark.executor.instances",
+ "spark.executor.memory",
+ "spark.executor.memoryOverhead"
+ )
+ override val recommendationsToInclude: Seq[(String, String)] = Seq(
+ ("spark.databricks.optimizer.dynamicFilePruning", "false")
+ )
+}
+
+class DataprocPlatform(platformType: String) extends Platform(platformType)
+
+class EmrPlatform(platformType: String) extends Platform(platformType)
+
+class OnPremPlatform extends Platform(PlatformNames.ONPREM)
+
+/**
+ * Factory for creating instances of different platforms.
+ * This factory supports various platforms and provides methods for creating
+ * corresponding platform instances.
+ */
+object PlatformFactory extends Logging {
+ /**
+ * Creates an instance of a platform based on the specified platform key.
+ * If platform key is not defined, returns an instance of onprem platform.
+ *
+ * @param platformKey The key representing the desired platform.
+ * @return An instance of the specified platform.
+ * @throws IllegalArgumentException if the specified platform key is not supported.
+ */
+ @tailrec
+ def createInstance(platformKey: String = PlatformNames.DEFAULT): Platform = {
+ platformKey match {
+ case PlatformNames.DATABRICKS_AWS | PlatformNames.DATABRICKS_AZURE =>
+ new DatabricksPlatform(platformKey)
+ case PlatformNames.DATAPROC | PlatformNames.DATAPROC_T4 =>
+ // if no GPU specified, then default to dataproc-t4 for backward compatibility
+ new DataprocPlatform(PlatformNames.DATAPROC_T4)
+ case PlatformNames.DATAPROC_L4 | PlatformNames.DATAPROC_SL_L4 |
+ PlatformNames.DATAPROC_GKE_L4 | PlatformNames.DATAPROC_GKE_T4 =>
+ new DataprocPlatform(platformKey)
+ case PlatformNames.EMR | PlatformNames.EMR_T4 =>
+ // if no GPU specified, then default to emr-t4 for backward compatibility
+ new EmrPlatform(PlatformNames.EMR_T4)
+ case PlatformNames.EMR_A10 => new EmrPlatform(PlatformNames.EMR_A10)
+ case PlatformNames.ONPREM => new OnPremPlatform
+ case p if p.isEmpty =>
+ logInfo(s"Platform is not specified. Using ${PlatformNames.DEFAULT} " +
+ "as default.")
+ PlatformFactory.createInstance(PlatformNames.DEFAULT)
+ case _ => throw new IllegalArgumentException(s"Unsupported platform: $platformKey. " +
+ s"Options include ${PlatformNames.getAllNames.mkString(", ")}.")
+ }
+ }
+}
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala
index 96829df86..b809d9521 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala
@@ -39,7 +39,9 @@ class ExecInfo(
val children: Option[Seq[ExecInfo]], // only one level deep
val stages: Set[Int] = Set.empty,
val shouldRemove: Boolean = false,
- val unsupportedExprs: Array[String] = Array.empty) {
+ val unsupportedExprs: Array[String] = Array.empty,
+ val dataSet: Boolean = false,
+ val udf: Boolean = false) {
private def childrenToString = {
val str = children.map { c =>
c.map(" " + _.toString).mkString("\n")
@@ -76,7 +78,8 @@ object SQLPlanParser extends Logging {
val windowFunctionPattern = """(\w+)\(""".r
- val ignoreExpressions = Array("any", "cast", "decimal", "decimaltype", "every", "some",
+ val ignoreExpressions = Array("any", "cast", "ansi_cast", "decimal", "decimaltype", "every",
+ "some", "merge_max", "merge_min", "merge_sum", "merge_count", "merge_avg", "merge_first",
"list",
// current_database does not cause any CPU fallbacks
"current_database",
@@ -301,13 +304,12 @@ object SQLPlanParser extends Logging {
}
val stagesInNode = getStagesInSQLNode(node, app)
val supported = execInfos.isSupported && !ds && !containsUDF
-
// shouldRemove is set to true if the exec is a member of "execsToBeRemoved" or if the node
// is a duplicate
val removeFlag = execInfos.shouldRemove || isDupNode || execsToBeRemoved.contains(node.name)
Seq(new ExecInfo(execInfos.sqlID, execInfos.exec, execInfos.expr, execInfos.speedupFactor,
execInfos.duration, execInfos.nodeId, supported, execInfos.children,
- stagesInNode, removeFlag, execInfos.unsupportedExprs))
+ stagesInNode, removeFlag, execInfos.unsupportedExprs, ds, containsUDF))
}
}
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala
index 9b78a5b41..5419df9c6 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala
@@ -25,6 +25,7 @@ import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.util.matching.Regex
+import com.nvidia.spark.rapids.tool.{Platform, PlatformFactory}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, Path}
import org.yaml.snakeyaml.{DumperOptions, LoaderOptions, Yaml}
@@ -329,7 +330,8 @@ class RecommendationEntry(val name: String,
class AutoTuner(
val clusterProps: ClusterProperties,
val appInfoProvider: AppSummaryInfoBaseProvider,
- val platform: String) extends Logging {
+ val platform: Platform,
+ unsupportedOperators: Seq[DriverLogUnsupportedOperators]) extends Logging {
import AutoTuner._
@@ -343,23 +345,15 @@ class AutoTuner(
private val limitedLogicRecommendations: mutable.HashSet[String] = mutable.HashSet[String]()
// When enabled, the profiler recommendations should only include updated settings.
private var filterByUpdatedPropertiesEnabled: Boolean = true
- val selectedPlatform: Platform = platform match {
- case "databricks" => new DatabricksPlatform()
- case "dataproc" => new DataprocPlatform()
- case "emr" => new EmrPlatform()
- case "onprem" => new OnPremPlatform()
- }
private def isCalculationEnabled(prop: String) : Boolean = {
!limitedLogicRecommendations.contains(prop)
}
def getPropertyValue(key: String): Option[String] = {
- val fromProfile = appInfoProvider.getProperty(key)
- fromProfile match {
- case None => Option(clusterProps.softwareProperties.get(key))
- case Some(_) => fromProfile
- }
+ val fromProfile = Option(appInfoProvider).flatMap(_.getProperty(key))
+ // If the value is not found above, fallback to cluster properties
+ fromProfile.orElse(Option(clusterProps.softwareProperties.get(key)))
}
def initRecommendations(): Unit = {
@@ -824,6 +818,20 @@ class AutoTuner(
appendRecommendation("spark.sql.shuffle.partitions", s"$shufflePartitions")
}
+ /**
+ * Analyzes unsupported driver logs and generates recommendations for configuration properties.
+ */
+ private def recommendFromDriverLogs(): Unit = {
+ // Iterate through unsupported operators' reasons and check for matching properties
+ unsupportedOperators.map(_.reason).foreach { operatorReason =>
+ recommendationsFromDriverLogs.collect {
+ case (config, recommendedValue) if operatorReason.contains(config) =>
+ appendRecommendation(config, recommendedValue)
+ appendComment(commentForExperimentalConfig(config))
+ }
+ }
+ }
+
def appendOptionalComment(lookup: String, comment: String): Unit = {
if (!skippedRecommendations.contains(lookup)) {
appendComment(comment)
@@ -912,7 +920,7 @@ class AutoTuner(
limitedSeq.foreach(_ => limitedLogicRecommendations.add(_))
}
skipList.foreach(skipSeq => skipSeq.foreach(_ => skippedRecommendations.add(_)))
- skippedRecommendations ++= selectedPlatform.recommendationsToExclude
+ skippedRecommendations ++= platform.recommendationsToExclude
initRecommendations()
calculateJobLevelRecommendations()
if (processPropsAndCheck) {
@@ -922,10 +930,13 @@ class AutoTuner(
addDefaultComments()
}
// add all platform specific recommendations
- selectedPlatform.recommendationsToInclude.foreach {
+ platform.recommendationsToInclude.foreach {
case (property, value) => appendRecommendation(property, value)
}
}
+ if (unsupportedOperators.nonEmpty) {
+ recommendFromDriverLogs()
+ }
(toRecommendationsProfileResult, toCommentProfileResult)
}
}
@@ -975,6 +986,8 @@ object AutoTuner extends Logging {
val DEF_READ_SIZE_THRESHOLD = 100 * 1024L * 1024L * 1024L
val DEFAULT_WORKER_INFO_PATH = "./worker_info.yaml"
val SUPPORTED_SIZE_UNITS: Seq[String] = Seq("b", "k", "m", "g", "t", "p")
+ private val DOC_URL: String = "https://nvidia.github.io/spark-rapids/docs/" +
+ "additional-functionality/advanced_configs.html#advanced-configuration"
val commentsForMissingProps: Map[String, String] = Map(
"spark.executor.memory" ->
@@ -1022,15 +1035,27 @@ object AutoTuner extends Logging {
" If the Spark RAPIDS jar is being bundled with your Spark\n" +
" distribution, this step is not needed.")
)
+
+ // Recommended values for specific unsupported configurations
+ private val recommendationsFromDriverLogs: Map[String, String] = Map(
+ "spark.rapids.sql.incompatibleDateFormats.enabled" -> "true"
+ )
+
+ def commentForExperimentalConfig(config: String): String = {
+ s"Using $config does not guarantee to produce the same results as CPU. " +
+ s"Please refer to $DOC_URL."
+ }
+
// the plugin jar is in the form of rapids-4-spark_scala_binary-(version)-*.jar
val pluginJarRegEx: Regex = "rapids-4-spark_\\d\\.\\d+-(\\d{2}\\.\\d{2}\\.\\d+).*\\.jar".r
private def handleException(
ex: Exception,
appInfo: AppSummaryInfoBaseProvider,
- platform: String): AutoTuner = {
+ platform: Platform,
+ unsupportedOperators: Seq[DriverLogUnsupportedOperators]): AutoTuner = {
logError("Exception: " + ex.getStackTrace.mkString("Array(", ", ", ")"))
- val tuning = new AutoTuner(new ClusterProperties(), appInfo, platform)
+ val tuning = new AutoTuner(new ClusterProperties(), appInfo, platform, unsupportedOperators)
val msg = ex match {
case cEx: ConstructorException => cEx.getContext
case _ => if (ex.getCause != null) ex.getCause.toString else ex.toString
@@ -1080,26 +1105,30 @@ object AutoTuner extends Logging {
def buildAutoTunerFromProps(
clusterProps: String,
singleAppProvider: AppSummaryInfoBaseProvider,
- platform: String = Profiler.DEFAULT_PLATFORM): AutoTuner = {
+ platform: Platform = PlatformFactory.createInstance(),
+ unsupportedOperators: Seq[DriverLogUnsupportedOperators] = Seq.empty): AutoTuner = {
try {
val clusterPropsOpt = loadClusterPropertiesFromContent(clusterProps)
- new AutoTuner(clusterPropsOpt.getOrElse(new ClusterProperties()), singleAppProvider, platform)
+ new AutoTuner(clusterPropsOpt.getOrElse(new ClusterProperties()), singleAppProvider, platform,
+ unsupportedOperators)
} catch {
case e: Exception =>
- handleException(e, singleAppProvider, platform)
+ handleException(e, singleAppProvider, platform, unsupportedOperators)
}
}
def buildAutoTuner(
filePath: String,
singleAppProvider: AppSummaryInfoBaseProvider,
- platform: String = Profiler.DEFAULT_PLATFORM): AutoTuner = {
+ platform: Platform = PlatformFactory.createInstance(),
+ unsupportedOperators: Seq[DriverLogUnsupportedOperators] = Seq.empty): AutoTuner = {
try {
val clusterPropsOpt = loadClusterProps(filePath)
- new AutoTuner(clusterPropsOpt.getOrElse(new ClusterProperties()), singleAppProvider, platform)
+ new AutoTuner(clusterPropsOpt.getOrElse(new ClusterProperties()), singleAppProvider, platform,
+ unsupportedOperators)
} catch {
case e: Exception =>
- handleException(e, singleAppProvider, platform)
+ handleException(e, singleAppProvider, platform, unsupportedOperators)
}
}
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Platform.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Platform.scala
deleted file mode 100644
index 46c6bc8e0..000000000
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Platform.scala
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.nvidia.spark.rapids.tool.profiling
-
-/**
- * Represents a platform and its associated recommendations.
- */
-class Platform {
- /**
- * Recommendations to be excluded from the list of recommendations.
- * These have the highest priority.
- */
- val recommendationsToExclude: Seq[String] = Seq.empty
- /**
- * Recommendations to be included in the final list of recommendations.
- * These properties should be specific to the platform and not general Spark properties.
- * For example: "spark.databricks.optimizer.dynamicFilePruning" for the Databricks platform.
- *
- * Represented as a tuple of (propertyKey, propertyValue).
- */
- val recommendationsToInclude: Seq[(String, String)] = Seq.empty
- /**
- * Dynamically calculates the recommendation for a specific Spark property by invoking
- * the appropriate function based on `sparkProperty`.
- * TODO: Implement this function and integrate with existing code in AutoTuner
- *
- * @param sparkProperty The Spark property for which the recommendation is calculated.
- * @param args Variable list of arguments passed to the calculation function for dynamic
- * processing.
- * @return Optional string containing the recommendation, or `None` if unavailable.
- */
- def getRecommendation(sparkProperty: String, args: Any*): Option[String] = None
-
- /**
- * Checks if the `property` is valid:
- * 1. It should not be in exclusion list
- * OR
- * 2. It should be in the inclusion list
- */
- def isValidRecommendation(property: String): Boolean = {
- !recommendationsToExclude.contains(property) ||
- recommendationsToInclude.map(_._1).contains(property)
- }
-
- /**
- * Checks if the `comment` is valid:
- * 1. It should not have any property from the exclusion list
- */
- def isValidComment(comment: String): Boolean = {
- recommendationsToExclude.forall(excluded => !comment.contains(excluded))
- }
-}
-
-class DatabricksPlatform extends Platform {
- override val recommendationsToExclude: Seq[String] = Seq(
- "spark.executor.cores",
- "spark.executor.instances",
- "spark.executor.memory",
- "spark.executor.memoryOverhead"
- )
- override val recommendationsToInclude: Seq[(String, String)] = Seq(
- ("spark.databricks.optimizer.dynamicFilePruning", "false")
- )
-}
-
-class DataprocPlatform extends Platform {}
-
-class EmrPlatform extends Platform {}
-
-class OnPremPlatform extends Platform {}
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileArgs.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileArgs.scala
index b1044a4ed..839e3789e 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileArgs.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileArgs.scala
@@ -15,6 +15,7 @@
*/
package com.nvidia.spark.rapids.tool.profiling
+import com.nvidia.spark.rapids.tool.PlatformNames
import org.rogach.scallop.{ScallopConf, ScallopOption}
import org.rogach.scallop.exceptions.ScallopException
@@ -70,9 +71,9 @@ Usage: java -cp rapids-4-spark-tools_2.12-.jar:$SPARK_HOME/jars/*
val platform: ScallopOption[String] =
opt[String](required = false,
descr = "Cluster platform where Spark GPU workloads were executed. Options include " +
- "onprem, dataproc, emr, databricks." +
- " Default is onprem.",
- default = Some(Profiler.DEFAULT_PLATFORM))
+ s"${PlatformNames.getAllNames.mkString(", ")}. " +
+ s"Default is ${PlatformNames.DEFAULT}.",
+ default = Some(PlatformNames.DEFAULT))
val generateTimeline: ScallopOption[Boolean] =
opt[Boolean](required = false,
descr = "Write an SVG graph out for the full application timeline.")
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileMain.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileMain.scala
index d839f76a8..71759a9ae 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileMain.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileMain.scala
@@ -77,10 +77,10 @@ object ProfileMain extends Logging {
}
val profiler = new Profiler(hadoopConf, appArgs, enablePB)
- profiler.profile(eventLogFsFiltered)
- if (driverLog.nonEmpty){
- profiler.profileDriver(driverLog)
+ if (driverLog.nonEmpty) {
+ profiler.profileDriver(driverLog, eventLogFsFiltered.isEmpty)
}
+ profiler.profile(eventLogFsFiltered)
(0, filteredLogs.size)
}
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala
index 44528966b..a3fff6067 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.{ArrayBuffer, HashMap}
import scala.util.control.NonFatal
import com.nvidia.spark.rapids.ThreadFactoryBuilder
-import com.nvidia.spark.rapids.tool.{EventLogInfo, EventLogPathProcessor}
+import com.nvidia.spark.rapids.tool.{EventLogInfo, EventLogPathProcessor, PlatformFactory}
import org.apache.hadoop.conf.Configuration
import org.apache.spark.internal.Logging
@@ -124,15 +124,21 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs, enablePB: Boolea
progressBar.foreach(_.finishAll())
}
- def profileDriver(driverLogInfos: String): Unit = {
+ def profileDriver(driverLogInfos: String, eventLogsEmpty: Boolean): Unit = {
val profileOutputWriter = new ProfileOutputWriter(s"$outputDir/driver",
Profiler.DRIVER_LOG_NAME, numOutputRows, true)
-
try {
val driverLogProcessor = new DriverLogProcessor(driverLogInfos)
- val unsupportedDrivers = driverLogProcessor.processDriverLog()
+ val unsupportedDriverOperators = driverLogProcessor.processDriverLog()
profileOutputWriter.write(s"Unsupported operators in driver log",
- unsupportedDrivers)
+ unsupportedDriverOperators)
+ if (eventLogsEmpty && useAutoTuner) {
+ // Since event logs are empty, AutoTuner will not run while processing event logs.
+ // We need to run it here explicitly.
+ val (properties, comments) = runAutoTuner(None, unsupportedDriverOperators)
+ profileOutputWriter.writeText("\n### A. Recommended Configuration ###\n")
+ profileOutputWriter.writeText(Profiler.getAutoTunerResultsAsString(properties, comments))
+ }
} finally {
profileOutputWriter.close()
}
@@ -403,6 +409,26 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs, enablePB: Boolea
appLogPath, ioAnalysisMetrics), compareRes)
}
+ /**
+ * A wrapper method to run the AutoTuner.
+ * @param appInfo Summary of the application for tuning.
+ * @param unsupportedDriverOperators List of unsupported operators from driver log
+ */
+ private def runAutoTuner(appInfo: Option[ApplicationSummaryInfo],
+ unsupportedDriverOperators: Seq[DriverLogUnsupportedOperators])
+ : (Seq[RecommendedPropertyResult], Seq[RecommendedCommentResult]) = {
+ val appInfoProvider = appInfo.map(new SingleAppSummaryInfoProvider(_)).orNull
+ val workerInfoPath = appArgs.workerInfo.getOrElse(AutoTuner.DEFAULT_WORKER_INFO_PATH)
+ val platform = appArgs.platform()
+ val autoTuner: AutoTuner = AutoTuner.buildAutoTuner(workerInfoPath, appInfoProvider,
+ PlatformFactory.createInstance(platform), unsupportedDriverOperators)
+
+ // The autotuner allows skipping some properties,
+ // e.g., getRecommendedProperties(Some(Seq("spark.executor.instances"))) skips the
+ // recommendation related to executor instances.
+ autoTuner.getRecommendedProperties()
+ }
+
def writeOutput(profileOutputWriter: ProfileOutputWriter,
appsSum: Seq[ApplicationSummaryInfo], outputCombined: Boolean,
comparedRes: Option[CompareSummaryInfo] = None): Unit = {
@@ -464,7 +490,7 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs, enablePB: Boolea
} else {
appsSum
}
- sums.foreach { app =>
+ sums.foreach { app: ApplicationSummaryInfo =>
profileOutputWriter.writeText("### A. Information Collected ###")
profileOutputWriter.write("Application Information", app.appInfo)
profileOutputWriter.write("Application Log Path Mapping", app.appLogPath)
@@ -510,14 +536,7 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs, enablePB: Boolea
Some("Unsupported SQL Ops"))
if (useAutoTuner) {
- val workerInfoPath = appArgs.workerInfo.getOrElse(AutoTuner.DEFAULT_WORKER_INFO_PATH)
- val platform = appArgs.platform.getOrElse(Profiler.DEFAULT_PLATFORM)
- val autoTuner: AutoTuner = AutoTuner.buildAutoTuner(workerInfoPath,
- new SingleAppSummaryInfoProvider(app), platform)
- // the autotuner allows skipping some properties
- // e.g. getRecommendedProperties(Some(Seq("spark.executor.instances"))) skips the
- // recommendation related to executor instances.
- val (properties, comments) = autoTuner.getRecommendedProperties()
+ val (properties, comments) = runAutoTuner(Some(app), Seq.empty)
profileOutputWriter.writeText("\n### D. Recommended Configuration ###\n")
profileOutputWriter.writeText(Profiler.getAutoTunerResultsAsString(properties, comments))
}
@@ -548,7 +567,6 @@ object Profiler {
val COMPARE_LOG_FILE_NAME_PREFIX = "rapids_4_spark_tools_compare"
val COMBINED_LOG_FILE_NAME_PREFIX = "rapids_4_spark_tools_combined"
val SUBDIR = "rapids_4_spark_profile"
- val DEFAULT_PLATFORM = "onprem"
def getAutoTunerResultsAsString(props: Seq[RecommendedPropertyResult],
comments: Seq[RecommendedCommentResult]): String = {
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeChecker.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeChecker.scala
index 7baf5455e..0ff5bd614 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeChecker.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeChecker.scala
@@ -20,6 +20,7 @@ import scala.collection.mutable.{ArrayBuffer,HashMap}
import scala.io.{BufferedSource, Source}
import scala.util.control.NonFatal
+import com.nvidia.spark.rapids.tool.{Platform, PlatformFactory}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
@@ -32,7 +33,7 @@ import org.apache.spark.internal.Logging
* by the plugin which lists the formats and types supported.
* The class also supports a custom speedup factor file as input.
*/
-class PluginTypeChecker(platform: String = "onprem",
+class PluginTypeChecker(platform: Platform = PlatformFactory.createInstance(),
speedupFactorFile: Option[String] = None) extends Logging {
private val NS = "NS"
@@ -44,16 +45,6 @@ class PluginTypeChecker(platform: String = "onprem",
private val NA = "NA"
private val DEFAULT_DS_FILE = "supportedDataSource.csv"
- private val OPERATORS_SCORE_FILE_ONPREM = "operatorsScore.csv"
- private val OPERATORS_SCORE_FILE_DATAPROC_T4 = "operatorsScore-dataproc-t4.csv"
- private val OPERATORS_SCORE_FILE_DATAPROC_L4 = "operatorsScore-dataproc-l4.csv"
- private val OPERATORS_SCORE_FILE_DATAPROC_SL_L4 = "operatorsScore-dataproc-serverless-l4.csv"
- private val OPERATORS_SCORE_FILE_DATAPROC_GKE_T4 = "operatorsScore-dataproc-gke-t4.csv"
- private val OPERATORS_SCORE_FILE_DATAPROC_GKE_L4 = "operatorsScore-dataproc-gke-l4.csv"
- private val OPERATORS_SCORE_FILE_EMR_T4 = "operatorsScore-emr-t4.csv"
- private val OPERATORS_SCORE_FILE_EMR_A10 = "operatorsScore-emr-a10.csv"
- private val OPERATORS_SCORE_FILE_DATABRICKS_AWS = "operatorsScore-databricks-aws.csv"
- private val OPERATORS_SCORE_FILE_DATABRICKS_AZURE = "operatorsScore-databricks-azure.csv"
private val SUPPORTED_EXECS_FILE = "supportedExecs.csv"
private val SUPPORTED_EXPRS_FILE = "supportedExprs.csv"
@@ -101,20 +92,7 @@ class PluginTypeChecker(platform: String = "onprem",
speedupFactorFile match {
case None =>
logInfo(s"Reading operators scores with platform: $platform")
- val file = platform match {
- // if no GPU specified, then default to dataproc-t4 for backward compatibility
- case "dataproc-t4" | "dataproc" => OPERATORS_SCORE_FILE_DATAPROC_T4
- case "dataproc-l4" => OPERATORS_SCORE_FILE_DATAPROC_L4
- case "dataproc-serverless-l4" => OPERATORS_SCORE_FILE_DATAPROC_SL_L4
- case "dataproc-gke-t4" => OPERATORS_SCORE_FILE_DATAPROC_GKE_T4
- case "dataproc-gke-l4" => OPERATORS_SCORE_FILE_DATAPROC_GKE_L4
- // if no GPU specified, then default to emr-t4 for backward compatibility
- case "emr-t4" | "emr" => OPERATORS_SCORE_FILE_EMR_T4
- case "emr-a10" => OPERATORS_SCORE_FILE_EMR_A10
- case "databricks-aws" => OPERATORS_SCORE_FILE_DATABRICKS_AWS
- case "databricks-azure" => OPERATORS_SCORE_FILE_DATABRICKS_AZURE
- case _ => OPERATORS_SCORE_FILE_ONPREM
- }
+ val file = platform.getOperatorScoreFile
val source = Source.fromResource(file)
readSupportedOperators(source, "score").map(x => (x._1, x._2.toDouble))
case Some(file) =>
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualOutputWriter.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualOutputWriter.scala
index d4eff43fc..bd1b91654 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualOutputWriter.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualOutputWriter.scala
@@ -24,7 +24,7 @@ import com.nvidia.spark.rapids.tool.profiling.ProfileUtils.replaceDelimiter
import com.nvidia.spark.rapids.tool.qualification.QualOutputWriter.{CLUSTER_ID, CLUSTER_ID_STR_SIZE, JOB_ID, JOB_ID_STR_SIZE, RUN_NAME, RUN_NAME_STR_SIZE, TEXT_DELIMITER}
import org.apache.hadoop.conf.Configuration
-import org.apache.spark.sql.rapids.tool.ToolUtils
+import org.apache.spark.sql.rapids.tool.{IgnoreExecs, ToolUtils}
import org.apache.spark.sql.rapids.tool.qualification.{EstimatedPerSQLSummaryInfo, EstimatedSummaryInfo, QualificationAppInfo, QualificationSummaryInfo, StatusSummaryInfo}
import org.apache.spark.sql.rapids.tool.util._
@@ -151,13 +151,38 @@ class QualOutputWriter(outputDir: String, reportReadSchema: Boolean,
val csvFileWriter = new ToolTextFileWriter(outputDir,
s"${QualOutputWriter.LOGFILE_NAME}_unsupportedOperators.csv",
"Unsupported Operators CSV Report", hadoopConf)
- val headersAndSizes = QualOutputWriter.getUnsupportedOperatorsHeaderStringsAndSizes(sums)
- csvFileWriter.write(QualOutputWriter.constructOutputRowFromMap(headersAndSizes,
- QualOutputWriter.CSV_DELIMITER, false))
- sums.foreach { sum =>
- val rows = QualOutputWriter.constructUnsupportedOperatorsInfo(sum, headersAndSizes,
- QualOutputWriter.CSV_DELIMITER, false)
- rows.foreach(row => csvFileWriter.write(row))
+ try {
+ val headersAndSizes = QualOutputWriter.getUnsupportedOperatorsHeaderStringsAndSizes(sums)
+ csvFileWriter.write(QualOutputWriter.constructOutputRowFromMap(headersAndSizes,
+ QualOutputWriter.CSV_DELIMITER, false))
+ sums.foreach { sum =>
+ val rows = QualOutputWriter.constructUnsupportedOperatorsInfo(sum, headersAndSizes,
+ QualOutputWriter.CSV_DELIMITER, false)
+ rows.foreach(row => csvFileWriter.write(row))
+ }
+ } finally {
+ csvFileWriter.close()
+ }
+ }
+
+ def writeUnsupportedOperatorsDetailedStageCSVReport(
+ sums: Seq[QualificationSummaryInfo],
+ order: String): Unit = {
+ val csvFileWriter = new ToolTextFileWriter(outputDir,
+ s"${QualOutputWriter.LOGFILE_NAME}_unsupportedOperatorsStageDuration.csv",
+ "Unsupported Operators StageDuration CSV Report", hadoopConf)
+ try {
+ val headersAndSizes =
+ QualOutputWriter.getUnsupportedOperatorsStageDurationsHeaderStringsAndSizes(sums)
+ csvFileWriter.write(QualOutputWriter.constructOutputRowFromMap(headersAndSizes,
+ QualOutputWriter.CSV_DELIMITER, false))
+ sums.foreach { sum =>
+ val rows = QualOutputWriter.constructUnsupportedStagesDurationInfo(sum, headersAndSizes,
+ QualOutputWriter.CSV_DELIMITER, false)
+ rows.foreach(row => csvFileWriter.write(row))
+ }
+ } finally {
+ csvFileWriter.close()
}
}
@@ -361,6 +386,7 @@ object QualOutputWriter {
val SQL_DUR_STR = "SQL DF Duration"
val TASK_DUR_STR = "SQL Dataframe Task Duration"
val STAGE_DUR_STR = "Stage Task Duration"
+ val STAGE_WALLCLOCK_DUR_STR = "Stage Duration"
val POT_PROBLEM_STR = "Potential Problems"
val EXEC_CPU_PERCENT_STR = "Executor CPU Time Percent"
val APP_DUR_ESTIMATED_STR = "App Duration Estimated"
@@ -400,6 +426,7 @@ object QualOutputWriter {
val UNSUPPORTED_TYPE = "Unsupported Type"
val DETAILS = "Details"
val NOTES = "Notes"
+ val IGNORE_OPERATOR = "Ignore Operator"
val RUN_NAME = "RunName"
val ESTIMATED_FREQUENCY = "Estimated Job Frequency (monthly)"
val ML_FUNCTIONS = "ML Functions"
@@ -540,11 +567,25 @@ object QualOutputWriter {
APP_ID_STR -> QualOutputWriter.getAppIdSize(appInfos),
UNSUPPORTED_TYPE -> UNSUPPORTED_TYPE.size,
DETAILS -> DETAILS.size,
- NOTES -> NOTES.size
+ NOTES -> NOTES.size,
+ IGNORE_OPERATOR -> IGNORE_OPERATOR.size
)
detailedHeaderAndFields
}
+ def getUnsupportedOperatorsStageDurationsHeaderStringsAndSizes(
+ appInfos: Seq[QualificationSummaryInfo]): LinkedHashMap[String, Int] = {
+ val detailedHeaderAndFields = LinkedHashMap[String, Int](
+ APP_ID_STR -> QualOutputWriter.getAppIdSize(appInfos),
+ UNSUPPORTED_TYPE -> UNSUPPORTED_TYPE.size,
+ STAGE_ID_STR -> STAGE_ID_STR.size,
+ STAGE_WALLCLOCK_DUR_STR -> STAGE_WALLCLOCK_DUR_STR.size,
+ APP_DUR_STR -> APP_DUR_STR.size,
+ SPEEDUP_BUCKET_STR -> SPEEDUP_BUCKET_STR_SIZE,
+ IGNORE_OPERATOR -> IGNORE_OPERATOR.size
+ )
+ detailedHeaderAndFields
+ }
def getDetailedHeaderStringsAndSizes(appInfos: Seq[QualificationSummaryInfo],
reportReadSchema: Boolean): LinkedHashMap[String, Int] = {
@@ -886,7 +927,7 @@ object QualOutputWriter {
}
}
- def constructUnsupportedOperatorsInfo(
+ def constructUnsupportedStagesDurationInfo(
sumInfo: QualificationSummaryInfo,
headersAndSizes: LinkedHashMap[String, Int],
delimiter: String = TEXT_DELIMITER,
@@ -895,81 +936,130 @@ object QualOutputWriter {
val reformatCSVFunc: String => String =
if (reformatCSV) str => StringUtils.reformatCSVString(str) else str => stringIfempty(str)
val appId = sumInfo.appId
- val readFormat = sumInfo.readFileFormatAndTypesNotSupported
- val writeFormat = sumInfo.writeDataFormat
- val unsupportedExecs = sumInfo.unSupportedExecs
- val unsupportedExprs = sumInfo.unSupportedExprs
- val unsupportedExecExprsMap = sumInfo.unsupportedExecstoExprsMap
- val unsupportedOperatorsOutputRows = new ArrayBuffer[String]()
+ val appDuration = sumInfo.sparkSqlDFWallClockDuration
+ val recommendation = sumInfo.estimatedInfo.recommendation
- if (readFormat.nonEmpty) {
- val unsupportedReadFormatRows = readFormat.map { format =>
- val readFormatAndType = format.split("\\[")
- val readFormat = readFormatAndType(0)
- val readType = if (readFormatAndType.size > 1) {
- s"Types not supported - ${readFormatAndType(1).replace("]", "")}"
- } else {
+ sumInfo.stageInfo.collect {
+ case info if info.unsupportedExecs.nonEmpty =>
+ val stageAppDuration = info.stageWallclockDuration
+ val allUnsupportedExecs = info.unsupportedExecs
+ if (allUnsupportedExecs.nonEmpty) {
+ allUnsupportedExecs.map { unsupportedExecsStr =>
+ // Ignore operator is a boolean value which indicates if the operator should be
+ // considered for GPU acceleration or not. If the value is true, the operator will
+ // be ignored.
+ val ignoreUnsupportedExec = if (
+ IgnoreExecs.getAllIgnoreExecs.contains(unsupportedExecsStr)) {
+ IgnoreExecs.True
+ } else {
+ IgnoreExecs.False
+ }
+ val data = ListBuffer[(String, Int)](
+ reformatCSVFunc(appId) -> headersAndSizes(APP_ID_STR),
+ reformatCSVFunc(unsupportedExecsStr) -> headersAndSizes(UNSUPPORTED_TYPE),
+ info.stageId.toString -> headersAndSizes(STAGE_ID_STR),
+ stageAppDuration.toString -> headersAndSizes(STAGE_WALLCLOCK_DUR_STR),
+ appDuration.toString -> headersAndSizes(APP_DUR_STR),
+ recommendation -> headersAndSizes(SPEEDUP_BUCKET_STR),
+ ignoreUnsupportedExec -> headersAndSizes(IGNORE_OPERATOR)
+ )
+ constructOutputRow(data, delimiter, prettyPrint)
+ }.mkString
+ }
+ else {
""
}
- val data = ListBuffer(
- reformatCSVFunc(appId) -> headersAndSizes(APP_ID_STR),
- reformatCSVFunc("Read")-> headersAndSizes(UNSUPPORTED_TYPE),
- reformatCSVFunc(readFormat) -> headersAndSizes(DETAILS),
- reformatCSVFunc(readType) -> headersAndSizes(NOTES)
- )
- constructOutputRow(data, delimiter, prettyPrint)
+ }
+ }
+
+ def constructUnsupportedOperatorsInfo(
+ sumInfo: QualificationSummaryInfo,
+ headersAndSizes: LinkedHashMap[String, Int],
+ delimiter: String = TEXT_DELIMITER,
+ prettyPrint: Boolean,
+ reformatCSV: Boolean = true): Seq[String] = {
+
+ val reformatCSVFunc: String => String =
+ if (reformatCSV) str => StringUtils.reformatCSVString(str) else str => stringIfempty(str)
+
+ val appId = reformatCSVFunc(sumInfo.appId)
+ val unsupportedOperatorsOutputRows = new ArrayBuffer[String]()
+ val unsupportedExprs = sumInfo.unSupportedExprs
+ val allExecs = getAllExecsFromPlan(sumInfo.planInfo)
+ val dataSetExecs = allExecs.collect { case x if x.dataSet => x.exec }
+ val udfExecs = allExecs.collect { case x if x.udf => x.exec }
+
+ def createUnsupportedRow(exec: String, execType: String, notes: String,
+ ignoreOperator: String = IgnoreExecs.False): String = {
+ val data = ListBuffer(
+ appId -> headersAndSizes(APP_ID_STR),
+ reformatCSVFunc(execType) -> headersAndSizes(UNSUPPORTED_TYPE),
+ reformatCSVFunc(exec) -> headersAndSizes(DETAILS),
+ reformatCSVFunc(notes) -> headersAndSizes(NOTES),
+ reformatCSVFunc(ignoreOperator) -> headersAndSizes(IGNORE_OPERATOR)
+ )
+ constructOutputRow(data, delimiter, prettyPrint)
+ }
+
+ val readFormatRows = sumInfo.readFileFormatAndTypesNotSupported.map { format =>
+ val readFormatAndType = format.split("\\[")
+ val readFormat = readFormatAndType(0)
+ val readType = if (readFormatAndType.size > 1) {
+ s"Types not supported - ${readFormatAndType(1).replace("]", "")}"
+ } else {
+ ""
}
- unsupportedOperatorsOutputRows ++= unsupportedReadFormatRows
+ createUnsupportedRow(readFormat,"Read", readType)
}
- if (unsupportedExecs.nonEmpty) {
- val unsupportedExecRows = unsupportedExecs.split(";").map { exec =>
- val data = ListBuffer(
- reformatCSVFunc(appId) -> headersAndSizes(APP_ID_STR),
- reformatCSVFunc("Exec") -> headersAndSizes(UNSUPPORTED_TYPE),
- reformatCSVFunc(exec) -> headersAndSizes(DETAILS),
- reformatCSVFunc("") -> headersAndSizes(NOTES)
- )
- constructOutputRow(data, delimiter, prettyPrint)
+ unsupportedOperatorsOutputRows ++= readFormatRows
+
+ // Unsupported Execs and Execs that are not supported due to unsupported expressions, or if
+ // the operation is from a dataset, or if the operation contains a UDF.
+ val unsupportedExecExprsMap = sumInfo.unsupportedExecstoExprsMap
+ val unsupportedExecsSet = sumInfo.unSupportedExecs.split(";").toSet
+ val unsupportedExecsFiltered = unsupportedExecsSet.filterNot(unsupportedExecExprsMap.contains)
+ val actualunsupportedExecs = unsupportedExecsFiltered.filterNot(x => dataSetExecs.contains(x)
+ || udfExecs.contains(x) || unsupportedExecExprsMap.contains(x))
+ val unsupportedExecRows = actualunsupportedExecs.map { exec =>
+ // If the exec is in the ignore list, then set the ignore operator to true.
+ if (IgnoreExecs.getAllIgnoreExecs.contains(exec)) {
+ createUnsupportedRow(exec, "Exec", "", IgnoreExecs.True)
+ } else {
+ createUnsupportedRow(exec, "Exec", "", IgnoreExecs.False)
}
- unsupportedOperatorsOutputRows ++= unsupportedExecRows
}
- if (unsupportedExecExprsMap.nonEmpty) {
- val unsupportedExecExprMapRows = unsupportedExecExprsMap.map { case (exec, exprs) =>
- val data = ListBuffer(
- reformatCSVFunc(appId) -> headersAndSizes(APP_ID_STR),
- reformatCSVFunc("Exec") -> headersAndSizes(UNSUPPORTED_TYPE),
- reformatCSVFunc(exec) -> headersAndSizes(DETAILS),
- reformatCSVFunc(s"$exec Exec is not supported as expressions are " +
- s"not supported - `$exprs`") -> headersAndSizes(NOTES)
- )
- constructOutputRow(data, delimiter, prettyPrint)
- }.toArray
- unsupportedOperatorsOutputRows ++= unsupportedExecExprMapRows
+ unsupportedOperatorsOutputRows ++= unsupportedExecRows
+
+ val unsupportedDatasetExecRows = dataSetExecs.map { exec =>
+ createUnsupportedRow(exec, "Exec", s"$exec Exec is not supported as " +
+ s"this operation is from dataset which is not supported")
+ }
+ unsupportedOperatorsOutputRows ++= unsupportedDatasetExecRows
+
+ val unsupportedUdfExecRows = udfExecs.map { exec =>
+ createUnsupportedRow(exec, "Exec", s"$exec Exec is " +
+ s"not supported as it contains UDF which is not supported")
}
+ unsupportedOperatorsOutputRows ++= unsupportedUdfExecRows
+
+ val unsupportedExecExprMapRows = sumInfo.unsupportedExecstoExprsMap.map { case (exec, exprs) =>
+ createUnsupportedRow(exec, "Exec", s"$exec Exec is not" +
+ s" supported as expressions are not supported - `$exprs`")
+ }.toArray
+ unsupportedOperatorsOutputRows ++= unsupportedExecExprMapRows
+
if (unsupportedExprs.nonEmpty) {
- val unsupportedExprRows = unsupportedExprs.split(";").map { expr =>
- val data = ListBuffer(
- reformatCSVFunc(appId) -> headersAndSizes(APP_ID_STR),
- reformatCSVFunc("Expression") -> headersAndSizes(UNSUPPORTED_TYPE),
- reformatCSVFunc(expr) -> headersAndSizes(DETAILS),
- reformatCSVFunc("") -> headersAndSizes(NOTES)
- )
- constructOutputRow(data, delimiter, prettyPrint)
+ val unsupportedExprRows = sumInfo.unSupportedExprs.split(";").map { expr =>
+ createUnsupportedRow(expr, "Expression", "")
}
unsupportedOperatorsOutputRows ++= unsupportedExprRows
}
- if (writeFormat.nonEmpty) {
- val unsupportedwriteFormatRows = writeFormat.map { format =>
- val data = ListBuffer(
- reformatCSVFunc(appId) -> headersAndSizes(APP_ID_STR),
- reformatCSVFunc("Write") -> headersAndSizes(UNSUPPORTED_TYPE),
- reformatCSVFunc(format) -> headersAndSizes(DETAILS),
- reformatCSVFunc("") -> headersAndSizes(NOTES)
- )
- constructOutputRow(data, delimiter, prettyPrint)
- }
- unsupportedOperatorsOutputRows ++= unsupportedwriteFormatRows
+
+ val unsupportedWriteFormatRows = sumInfo.writeDataFormat.map { format =>
+ createUnsupportedRow(format, "Write", "")
}
+ unsupportedOperatorsOutputRows ++= unsupportedWriteFormatRows
+
unsupportedOperatorsOutputRows
}
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala
index e733d73c2..0286cd582 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala
@@ -93,6 +93,7 @@ class Qualification(outputDir: String, numRows: Int, hadoopConf: Configuration,
qWriter.writeExecReport(allAppsSum, order)
qWriter.writeStageReport(allAppsSum, order)
qWriter.writeUnsupportedOperatorsCSVReport(allAppsSum, order)
+ qWriter.writeUnsupportedOperatorsDetailedStageCSVReport(allAppsSum, order)
val appStatusResult = generateStatusSummary(appStatusReporter.asScala.values.toSeq)
qWriter.writeStatusReport(appStatusResult, order)
if (mlOpsEnabled) {
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualificationArgs.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualificationArgs.scala
index d998cb0ff..c72477034 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualificationArgs.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualificationArgs.scala
@@ -15,6 +15,7 @@
*/
package com.nvidia.spark.rapids.tool.qualification
+import com.nvidia.spark.rapids.tool.PlatformNames
import org.rogach.scallop.{ScallopConf, ScallopOption}
import org.rogach.scallop.exceptions.ScallopException
@@ -155,10 +156,9 @@ Usage: java -cp rapids-4-spark-tools_2.12-.jar:$SPARK_HOME/jars/*
val platform: ScallopOption[String] =
opt[String](required = false,
descr = "Cluster platform where Spark CPU workloads were executed. Options include " +
- "onprem, dataproc-t4, dataproc-l4, dataproc-serverless-l4, dataproc-gke-t4, " +
- "dataproc-gke-l4, emr-t4, emr-a10, databricks-aws, and databricks-azure. Default " +
- "is onprem.",
- default = Some("onprem"))
+ s"${PlatformNames.getAllNames.mkString(", ")}. " +
+ s"Default is ${PlatformNames.DEFAULT}.",
+ default = Some(PlatformNames.DEFAULT))
val speedupFactorFile: ScallopOption[String] =
opt[String](required = false,
descr = "Custom speedup factor file used to get estimated GPU speedup that is specific " +
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualificationMain.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualificationMain.scala
index cb8a3c583..454b27695 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualificationMain.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualificationMain.scala
@@ -16,7 +16,7 @@
package com.nvidia.spark.rapids.tool.qualification
-import com.nvidia.spark.rapids.tool.EventLogPathProcessor
+import com.nvidia.spark.rapids.tool.{EventLogPathProcessor, PlatformFactory}
import org.apache.spark.internal.Logging
import org.apache.spark.sql.rapids.tool.AppFilterImpl
@@ -58,14 +58,16 @@ object QualificationMain extends Logging {
val order = appArgs.order.getOrElse("desc")
val uiEnabled = appArgs.htmlReport.getOrElse(false)
val reportSqlLevel = appArgs.perSql.getOrElse(false)
- val platform = appArgs.platform.getOrElse("onprem")
+ val platform = appArgs.platform()
val mlOpsEnabled = appArgs.mlFunctions.getOrElse(false)
val penalizeTransitions = appArgs.penalizeTransitions.getOrElse(true)
val hadoopConf = RapidsToolsConfUtil.newHadoopConf
val pluginTypeChecker = try {
- new PluginTypeChecker(platform, appArgs.speedupFactorFile.toOption)
+ new PluginTypeChecker(
+ PlatformFactory.createInstance(platform),
+ appArgs.speedupFactorFile.toOption)
} catch {
case ie: IllegalStateException =>
logError("Error creating the plugin type checker!", ie)
diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/ToolUtils.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/ToolUtils.scala
index 89deb7f1b..faeaf78c2 100644
--- a/core/src/main/scala/org/apache/spark/sql/rapids/tool/ToolUtils.scala
+++ b/core/src/main/scala/org/apache/spark/sql/rapids/tool/ToolUtils.scala
@@ -80,10 +80,14 @@ object ToolUtils extends Logging {
val targetEx = i.getTargetException
if (targetEx != null) {
targetEx match {
- case j: com.fasterxml.jackson.core.JsonParseException =>
+ case j: com.fasterxml.jackson.core.io.JsonEOFException =>
+ // Spark3.41+ embeds JsonEOFException in the InvocationTargetException
+ // We need to show a warning message instead of failing the entire app.
+ logWarning(s"Incomplete eventlog, ${j.getMessage}")
+ case k: com.fasterxml.jackson.core.JsonParseException =>
// this is a parser error thrown by spark-3.4+ which indicates the log is
// malformed
- throw j
+ throw k
case z: ClassNotFoundException if z.getMessage != null =>
logWarning(s"ClassNotFoundException while parsing an event: ${z.getMessage}")
case t: Throwable =>
@@ -94,10 +98,15 @@ object ToolUtils extends Logging {
// Normally it should not happen that invocation target is null.
logError(s"Unknown exception while parsing an event", i)
}
- case j: com.fasterxml.jackson.core.JsonParseException =>
+ case j: com.fasterxml.jackson.core.io.JsonEOFException =>
+ // Note that JsonEOFException is child of JsonParseException
+ // In case the eventlog is incomplete (i.e., inprogress), we show a warning message
+ // because we do not want to cause the entire app to fail.
+ logWarning(s"Incomplete eventlog, ${j.getMessage}")
+ case k: com.fasterxml.jackson.core.JsonParseException =>
// this is a parser error thrown by version prior to spark-3.4+ which indicates the
// log is malformed
- throw j
+ throw k
}
None
}
@@ -317,6 +326,37 @@ object SQLMetricsStats {
}
}
+object IgnoreExecs {
+ // AdaptiveSparkPlan is not a real exec. It is a wrapper for the whole plan.
+ private val AdaptiveSparkPlan = "AdaptiveSparkPlan"
+ // Collect Limit replacement can be slower on the GPU. Disabled by default.
+ private val CollectLimit = "CollectLimit"
+ private val ScanExistingRDD = "Scan ExistingRDD"
+ private val ExistingRDD = "ExistingRDD"
+ // Some DDL's and table commands which can be ignored
+ private val ExecuteCreateViewCommand = "Execute CreateViewCommand"
+ private val LocalTableScan = "LocalTableScan"
+ private val ExecuteCreateDatabaseCommand = "Execute CreateDatabaseCommand"
+ private val ExecuteDropDatabaseCommand = "Execute DropDatabaseCommand"
+ private val ExecuteCreateTableAsSelectCommand = "Execute CreateTableAsSelectCommand"
+ private val ExecuteCreateTableCommand = "Execute CreateTableCommand"
+ private val ExecuteDropTableCommand = "Execute DropTableCommand"
+ private val ExecuteCreateDataSourceTableAsSelectCommand = "Execute " +
+ "CreateDataSourceTableAsSelectCommand"
+ private val SetCatalogAndNamespace = "SetCatalogAndNamespace"
+ private val ExecuteSetCommand = "Execute SetCommand"
+
+
+ val True = "true"
+ val False = "false"
+
+ def getAllIgnoreExecs: Set[String] = Set(AdaptiveSparkPlan, CollectLimit, ScanExistingRDD,
+ ExecuteCreateViewCommand, ExistingRDD, LocalTableScan, ExecuteCreateTableCommand,
+ ExecuteDropTableCommand, ExecuteCreateDatabaseCommand, ExecuteDropDatabaseCommand,
+ ExecuteCreateTableAsSelectCommand, ExecuteCreateDataSourceTableAsSelectCommand,
+ SetCatalogAndNamespace, ExecuteSetCommand)
+}
+
object MlOps {
val sparkml = "spark.ml."
val xgBoost = "spark.XGBoost"
diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualificationAppInfo.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualificationAppInfo.scala
index 61377d0b3..a58d1e646 100644
--- a/core/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualificationAppInfo.scala
+++ b/core/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualificationAppInfo.scala
@@ -31,7 +31,7 @@ import org.apache.spark.internal.Logging
import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
import org.apache.spark.sql.execution.SparkPlanInfo
import org.apache.spark.sql.execution.ui.SparkPlanGraph
-import org.apache.spark.sql.rapids.tool.{AppBase, GpuEventLogException, SupportedMLFuncsName, ToolUtils}
+import org.apache.spark.sql.rapids.tool.{AppBase, GpuEventLogException, IgnoreExecs, SupportedMLFuncsName, ToolUtils}
class QualificationAppInfo(
eventLogInfo: Option[EventLogInfo],
@@ -296,6 +296,7 @@ class QualificationAppInfo(
val allSpeedupFactorAvg = SQLPlanParser.averageSpeedup(execInfos.map(_.speedupFactor))
val allFlattenedExecs = flattenedExecs(execInfos)
val numUnsupported = allFlattenedExecs.filterNot(_.isSupported)
+ val unsupportedExecs = numUnsupported.map(_.exec)
// if we have unsupported try to guess at how much time. For now divide
// time by number of execs and give each one equal weight
val eachExecTime = allStageTaskTime / allFlattenedExecs.size
@@ -339,8 +340,13 @@ class QualificationAppInfo(
eachStageUnsupported
}
+ // Get stage info for the given stageId.
+ val stageInfos = stageIdToInfo.filterKeys { case (id, _) => id == stageId }
+ val wallclockStageDuration = stageInfos.values.map(x => x.duration.getOrElse(0L)).sum
+
StageQualSummaryInfo(stageId, allSpeedupFactorAvg, stageTaskTime,
- finalEachStageUnsupported, numTransitions, transitionsTime, estimated)
+ finalEachStageUnsupported, numTransitions, transitionsTime, estimated,
+ wallclockStageDuration, unsupportedExecs)
}.toSet
}
@@ -459,7 +465,7 @@ class QualificationAppInfo(
c.filterNot(_.shouldRemove)
}
new ExecInfo(e.sqlID, e.exec, e.expr, e.speedupFactor, e.duration,
- e.nodeId, e.isSupported, filteredChildren, e.stages, e.shouldRemove)
+ e.nodeId, e.isSupported, filteredChildren, e.stages, e.shouldRemove, e.unsupportedExprs)
}
val filteredPlanInfos = execFilteredChildren.filterNot(_.shouldRemove)
p.copy(execInfo = filteredPlanInfos)
@@ -586,8 +592,8 @@ class QualificationAppInfo(
e.children.map(x => x.filterNot(_.isSupported))
}.flatten
topLevelExecs ++ childrenExecs
- }.map(_.exec).toSet.mkString(";").trim.replaceAll("\n", "")
- .replace(",", ":")
+ }.map(_.exec).toSet.mkString(";").trim.replaceAll("\n", "").replace(",", ":")
+
// Get all the unsupported Expressions from the plan
val unSupportedExprs = origPlanInfos.map(_.execInfo.flatMap(
_.unsupportedExprs)).flatten.filter(_.nonEmpty).toSet.mkString(";")
@@ -636,6 +642,9 @@ class QualificationAppInfo(
1
}
+ val wallClockSqlDFToUse = QualificationAppInfo.wallClockSqlDataFrameToUse(
+ sparkSQLDFWallClockDuration, appDuration)
+
val estimatedInfo = QualificationAppInfo.calculateEstimatedInfoSummary(estimatedGPURatio,
sparkSQLDFWallClockDuration, appDuration, taskSpeedupFactor, appName, appId,
sqlIdsWithFailures.nonEmpty, mlSpeedup, unSupportedExecs, unSupportedExprs,
@@ -646,8 +655,8 @@ class QualificationAppInfo(
notSupportFormatAndTypesString, getAllReadFileFormats, writeFormat,
allComplexTypes, nestedComplexTypes, longestSQLDuration, sqlDataframeTaskDuration,
nonSQLTaskDuration, unsupportedSQLTaskDuration, supportedSQLTaskDuration,
- taskSpeedupFactor, info.sparkUser, info.startTime, origPlanInfos,
- perSqlStageSummary.map(_.stageSum).flatten, estimatedInfo, perSqlInfos,
+ taskSpeedupFactor, info.sparkUser, info.startTime, wallClockSqlDFToUse,
+ origPlanInfos, perSqlStageSummary.map(_.stageSum).flatten, estimatedInfo, perSqlInfos,
unSupportedExecs, unSupportedExprs, clusterTags, allClusterTagsMap, mlFunctions,
mlTotalStageDuration, unsupportedExecExprsMap)
}
@@ -861,6 +870,7 @@ case class QualificationSummaryInfo(
taskSpeedupFactor: Double,
user: String,
startTime: Long,
+ sparkSqlDFWallClockDuration: Long,
planInfo: Seq[PlanInfo],
stageInfo: Seq[StageQualSummaryInfo],
estimatedInfo: EstimatedAppInfo,
@@ -881,7 +891,9 @@ case class StageQualSummaryInfo(
unsupportedTaskDur: Long,
numTransitions: Int,
transitionTime: Long,
- estimated: Boolean = false)
+ estimated: Boolean = false,
+ stageWallclockDuration: Long = 0,
+ unsupportedExecs: Seq[String] = Seq.empty)
object QualificationAppInfo extends Logging {
// define recommendation constants
@@ -926,19 +938,19 @@ object QualificationAppInfo extends Logging {
}
}
+ def wallClockSqlDataFrameToUse(sqlDataFrameDuration: Long, appDuration: Long): Long = {
+ // If our app duration is shorter than our sql duration, estimate the sql duration down
+ // to app duration
+ math.min(sqlDataFrameDuration, appDuration)
+ }
+
// Summarize and estimate based on wall clock times
def calculateEstimatedInfoSummary(estimatedRatio: Double, sqlDataFrameDuration: Long,
appDuration: Long, sqlSpeedupFactor: Double, appName: String, appId: String,
hasFailures: Boolean, mlSpeedupFactor: Option[MLFuncsSpeedupAndDuration] = None,
unsupportedExecs: String = "", unsupportedExprs: String = "",
allClusterTagsMap: Map[String, String] = Map.empty[String, String]): EstimatedAppInfo = {
- val sqlDataFrameDurationToUse = if (sqlDataFrameDuration > appDuration) {
- // our app duration is shorter then our sql duration, estimate the sql duration down
- // to app duration
- appDuration
- } else {
- sqlDataFrameDuration
- }
+ val sqlDataFrameDurationToUse = wallClockSqlDataFrameToUse(sqlDataFrameDuration, appDuration)
// get the average speedup and duration for ML funcs supported on GPU
val (mlSpeedup, mlDuration) = if (mlSpeedupFactor.isDefined) {
diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/profiling/AutoTunerSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/profiling/AutoTunerSuite.scala
index 36831386b..755c4f6c8 100644
--- a/core/src/test/scala/com/nvidia/spark/rapids/tool/profiling/AutoTunerSuite.scala
+++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/profiling/AutoTunerSuite.scala
@@ -21,6 +21,7 @@ import java.util
import scala.collection.JavaConverters._
import scala.collection.mutable
+import com.nvidia.spark.rapids.tool.{PlatformFactory, PlatformNames}
import org.scalatest.{BeforeAndAfterEach, FunSuite}
import org.scalatest.Matchers.convertToAnyShouldWrapper
import org.yaml.snakeyaml.{DumperOptions, Yaml}
@@ -1283,16 +1284,17 @@ class AutoTunerSuite extends FunSuite with BeforeAndAfterEach with Logging {
assert(expectedResults == autoTunerOutput)
}
- test("test recommendations for databricks platform argument") {
+ test("test recommendations for databricks-aws platform argument") {
val databricksWorkerInfo = buildWorkerInfoAsString()
+ val platform = PlatformFactory.createInstance(PlatformNames.DATABRICKS_AWS)
val autoTuner = AutoTuner.buildAutoTunerFromProps(databricksWorkerInfo,
- getGpuAppMockInfoProvider, "databricks")
+ getGpuAppMockInfoProvider, platform)
val (properties, comments) = autoTuner.getRecommendedProperties()
// Assert recommendations are excluded in properties
- assert(properties.map(_.property).forall(autoTuner.selectedPlatform.isValidRecommendation))
+ assert(properties.map(_.property).forall(autoTuner.platform.isValidRecommendation))
// Assert recommendations are skipped in comments
- assert(comments.map(_.comment).forall(autoTuner.selectedPlatform.isValidComment))
+ assert(comments.map(_.comment).forall(autoTuner.platform.isValidComment))
}
// When spark is running as a standalone, the memoryOverhead should not be listed as a
@@ -1357,4 +1359,152 @@ class AutoTunerSuite extends FunSuite with BeforeAndAfterEach with Logging {
// scalastyle:on line.size.limit
assert(expectedResults == autoTunerOutput)
}
+
+ test("Recommendations generated for unsupported operators from driver logs only") {
+ val customProps = mutable.LinkedHashMap(
+ "spark.executor.cores" -> "8",
+ "spark.executor.memory" -> "47222m",
+ "spark.rapids.sql.concurrentGpuTasks" -> "2",
+ "spark.task.resource.gpu.amount" -> "0.0625")
+ val unsupportedDriverOperators = Seq(
+ DriverLogUnsupportedOperators(
+ "FromUnixTime", 1,
+ "Only UTC zone id is supported. Actual default zone id: America/Los_Angeles; " +
+ "CORRECTED format 'yyyyMMdd' on the GPU is not guaranteed to produce the same " +
+ "results as Spark on CPU. Set spark.rapids.sql.incompatibleDateFormats.enabled=true " +
+ "to force onto GPU.")
+ )
+ val workerInfo = buildWorkerInfoAsString(Some(customProps))
+ val autoTuner: AutoTuner = AutoTuner.buildAutoTunerFromProps(workerInfo, null,
+ PlatformFactory.createInstance(), unsupportedDriverOperators)
+ val (properties, comments) = autoTuner.getRecommendedProperties()
+ val autoTunerOutput = Profiler.getAutoTunerResultsAsString(properties, comments)
+ // scalastyle:off line.size.limit
+ val expectedResults =
+ s"""|
+ |Spark Properties:
+ |--conf spark.rapids.sql.incompatibleDateFormats.enabled=true
+ |
+ |Comments:
+ |- 'spark.rapids.sql.incompatibleDateFormats.enabled' was not set.
+ |- AutoTuner recommendations only support eventlogs generated by Spark applications utilizing RAPIDS Accelerator for Apache Spark
+ |- RAPIDS Accelerator for Apache Spark jar is missing in "spark.plugins". Please refer to https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html
+ |- ${AutoTuner.commentForExperimentalConfig("spark.rapids.sql.incompatibleDateFormats.enabled")}
+ |""".stripMargin
+ // scalastyle:on line.size.limit
+ assert(expectedResults == autoTunerOutput)
+ }
+
+ test("Recommendations generated for unsupported operators from driver and event logs") {
+ val customProps = mutable.LinkedHashMap(
+ "spark.executor.cores" -> "8",
+ "spark.executor.memory" -> "47222m",
+ "spark.rapids.sql.concurrentGpuTasks" -> "2",
+ "spark.task.resource.gpu.amount" -> "0.0625")
+ val unsupportedDriverOperators = Seq(
+ DriverLogUnsupportedOperators(
+ "FromUnixTime", 1,
+ "Only UTC zone id is supported. Actual default zone id: America/Los_Angeles; " +
+ "CORRECTED format 'yyyyMMdd' on the GPU is not guaranteed to produce the same " +
+ "results as Spark on CPU. Set spark.rapids.sql.incompatibleDateFormats.enabled=true " +
+ "to force onto GPU.")
+ )
+ val workerInfo = buildWorkerInfoAsString(Some(customProps))
+ val autoTuner: AutoTuner = AutoTuner.buildAutoTunerFromProps(workerInfo,
+ getGpuAppMockInfoProvider, PlatformFactory.createInstance(), unsupportedDriverOperators)
+ val (properties, comments) = autoTuner.getRecommendedProperties()
+ val autoTunerOutput = Profiler.getAutoTunerResultsAsString(properties, comments)
+ // scalastyle:off line.size.limit
+ val expectedResults =
+ s"""|
+ |Spark Properties:
+ |--conf spark.executor.cores=16
+ |--conf spark.executor.instances=8
+ |--conf spark.executor.memory=32768m
+ |--conf spark.executor.memoryOverhead=8396m
+ |--conf spark.rapids.memory.pinnedPool.size=4096m
+ |--conf spark.rapids.shuffle.multiThreaded.reader.threads=16
+ |--conf spark.rapids.shuffle.multiThreaded.writer.threads=16
+ |--conf spark.rapids.sql.incompatibleDateFormats.enabled=true
+ |--conf spark.rapids.sql.multiThreadedRead.numThreads=20
+ |--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark311.RapidsShuffleManager
+ |--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
+ |--conf spark.sql.adaptive.coalescePartitions.minPartitionNum=128
+ |--conf spark.sql.files.maxPartitionBytes=512m
+ |--conf spark.sql.shuffle.partitions=200
+ |
+ |Comments:
+ |- 'spark.executor.instances' was not set.
+ |- 'spark.executor.memoryOverhead' was not set.
+ |- 'spark.rapids.memory.pinnedPool.size' was not set.
+ |- 'spark.rapids.shuffle.multiThreaded.reader.threads' was not set.
+ |- 'spark.rapids.shuffle.multiThreaded.writer.threads' was not set.
+ |- 'spark.rapids.sql.incompatibleDateFormats.enabled' was not set.
+ |- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set.
+ |- 'spark.shuffle.manager' was not set.
+ |- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
+ |- 'spark.sql.adaptive.coalescePartitions.minPartitionNum' was not set.
+ |- 'spark.sql.adaptive.enabled' should be enabled for better performance.
+ |- 'spark.sql.files.maxPartitionBytes' was not set.
+ |- 'spark.sql.shuffle.partitions' was not set.
+ |- ${AutoTuner.classPathComments("rapids.jars.missing")}
+ |- ${AutoTuner.classPathComments("rapids.shuffle.jars")}
+ |- ${AutoTuner.commentForExperimentalConfig("spark.rapids.sql.incompatibleDateFormats.enabled")}
+ |""".stripMargin
+ // scalastyle:on line.size.limit
+ assert(expectedResults == autoTunerOutput)
+ }
+
+
+ test("Recommendations generated for empty unsupported operators from driver logs only") {
+ val customProps = mutable.LinkedHashMap(
+ "spark.executor.cores" -> "8",
+ "spark.executor.memory" -> "47222m",
+ "spark.rapids.sql.concurrentGpuTasks" -> "2",
+ "spark.task.resource.gpu.amount" -> "0.0625")
+ val workerInfo = buildWorkerInfoAsString(Some(customProps))
+ val autoTuner: AutoTuner = AutoTuner.buildAutoTunerFromProps(workerInfo, null,
+ PlatformFactory.createInstance(), Seq.empty)
+ val (properties, comments) = autoTuner.getRecommendedProperties()
+ val autoTunerOutput = Profiler.getAutoTunerResultsAsString(properties, comments)
+ // scalastyle:off line.size.limit
+ val expectedResults =
+ s"""|Cannot recommend properties. See Comments.
+ |
+ |Comments:
+ |- AutoTuner recommendations only support eventlogs generated by Spark applications utilizing RAPIDS Accelerator for Apache Spark
+ |- RAPIDS Accelerator for Apache Spark jar is missing in "spark.plugins". Please refer to https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html
+ |""".stripMargin
+ // scalastyle:on line.size.limit
+ assert(expectedResults == autoTunerOutput)
+ }
+
+ test("Recommendations not generated for unsupported operators from driver logs") {
+ // This test does not generate any recommendations for the unsupported operator 'Literal'
+ val customProps = mutable.LinkedHashMap(
+ "spark.executor.cores" -> "8",
+ "spark.executor.memory" -> "47222m",
+ "spark.rapids.sql.concurrentGpuTasks" -> "2",
+ "spark.task.resource.gpu.amount" -> "0.0625")
+ val unsupportedDriverOperators = Seq(
+ DriverLogUnsupportedOperators(
+ "Literal", 3,
+ "expression Literal 1700518632630000 produces an unsupported type TimestampType")
+ )
+ val workerInfo = buildWorkerInfoAsString(Some(customProps))
+ val autoTuner: AutoTuner = AutoTuner.buildAutoTunerFromProps(workerInfo, null,
+ PlatformFactory.createInstance(), unsupportedDriverOperators)
+ val (properties, comments) = autoTuner.getRecommendedProperties()
+ val autoTunerOutput = Profiler.getAutoTunerResultsAsString(properties, comments)
+ // scalastyle:off line.size.limit
+ val expectedResults =
+ s"""|Cannot recommend properties. See Comments.
+ |
+ |Comments:
+ |- AutoTuner recommendations only support eventlogs generated by Spark applications utilizing RAPIDS Accelerator for Apache Spark
+ |- RAPIDS Accelerator for Apache Spark jar is missing in "spark.plugins". Please refer to https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html
+ |""".stripMargin
+ // scalastyle:on line.size.limit
+ assert(expectedResults == autoTunerOutput)
+ }
}
diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeCheckerSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeCheckerSuite.scala
index 61e8acf40..9a3640986 100644
--- a/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeCheckerSuite.scala
+++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeCheckerSuite.scala
@@ -19,7 +19,7 @@ package com.nvidia.spark.rapids.tool.qualification
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Paths}
-import com.nvidia.spark.rapids.tool.ToolTestUtils
+import com.nvidia.spark.rapids.tool.{PlatformFactory, PlatformNames, ToolTestUtils}
import com.nvidia.spark.rapids.tool.planparser.DataWritingCommandExecParser
import org.scalatest.FunSuite
@@ -153,68 +153,33 @@ class PluginTypeCheckerSuite extends FunSuite with Logging {
assert(result(2) == "ORC")
}
- test("supported operator score from onprem") {
- val checker = new PluginTypeChecker("onprem")
- assert(checker.getSpeedupFactor("UnionExec") == 3.0)
- assert(checker.getSpeedupFactor("Ceil") == 4)
- }
-
- test("supported operator score from dataproc-t4") {
- val checker = new PluginTypeChecker("dataproc-t4")
- assert(checker.getSpeedupFactor("UnionExec") == 4.88)
- assert(checker.getSpeedupFactor("Ceil") == 4.88)
- }
-
- test("supported operator score from emr-t4") {
- val checker = new PluginTypeChecker("emr-t4")
- assert(checker.getSpeedupFactor("UnionExec") == 2.07)
- assert(checker.getSpeedupFactor("Ceil") == 2.07)
- }
-
- test("supported operator score from databricks-aws") {
- val checker = new PluginTypeChecker("databricks-aws")
- assert(checker.getSpeedupFactor("UnionExec") == 2.45)
- assert(checker.getSpeedupFactor("Ceil") == 2.45)
- }
-
- test("supported operator score from databricks-azure") {
- val checker = new PluginTypeChecker("databricks-azure")
- assert(checker.getSpeedupFactor("UnionExec") == 2.73)
- assert(checker.getSpeedupFactor("Ceil") == 2.73)
- }
-
- test("supported operator score from dataproc-serverless-l4") {
- val checker = new PluginTypeChecker("dataproc-serverless-l4")
- assert(checker.getSpeedupFactor("WindowExec") == 4.25)
- assert(checker.getSpeedupFactor("Ceil") == 4.25)
- }
-
- test("supported operator score from dataproc-l4") {
- val checker = new PluginTypeChecker("dataproc-l4")
- assert(checker.getSpeedupFactor("UnionExec") == 4.16)
- assert(checker.getSpeedupFactor("Ceil") == 4.16)
- }
-
- test("supported operator score from dataproc-gke-t4") {
- val checker = new PluginTypeChecker("dataproc-gke-t4")
- assert(checker.getSpeedupFactor("WindowExec") == 3.65)
- assert(checker.getSpeedupFactor("Ceil") == 3.65)
- }
-
- test("supported operator score from dataproc-gke-l4") {
- val checker = new PluginTypeChecker("dataproc-gke-l4")
- assert(checker.getSpeedupFactor("WindowExec") == 3.74)
- assert(checker.getSpeedupFactor("Ceil") == 3.74)
- }
-
- test("supported operator score from emr-a10") {
- val checker = new PluginTypeChecker("emr-a10")
- assert(checker.getSpeedupFactor("UnionExec") == 2.59)
- assert(checker.getSpeedupFactor("Ceil") == 2.59)
+ val platformSpeedupEntries: Seq[(String, Map[String, Double])] = Seq(
+ (PlatformNames.ONPREM, Map("UnionExec" -> 3.0, "Ceil" -> 4.0)),
+ (PlatformNames.DATAPROC_T4, Map("UnionExec" -> 4.88, "Ceil" -> 4.88)),
+ (PlatformNames.EMR_T4, Map("UnionExec" -> 2.07, "Ceil" -> 2.07)),
+ (PlatformNames.DATABRICKS_AWS, Map("UnionExec" -> 2.45, "Ceil" -> 2.45)),
+ (PlatformNames.DATABRICKS_AZURE, Map("UnionExec" -> 2.73, "Ceil" -> 2.73)),
+ (PlatformNames.DATAPROC_SL_L4, Map("WindowExec" -> 4.25, "Ceil" -> 4.25)),
+ (PlatformNames.DATAPROC_L4, Map("UnionExec" -> 4.16, "Ceil" -> 4.16)),
+ (PlatformNames.DATAPROC_GKE_T4, Map("WindowExec" -> 3.65, "Ceil" -> 3.65)),
+ (PlatformNames.DATAPROC_GKE_L4, Map("WindowExec" -> 3.74, "Ceil" -> 3.74)),
+ (PlatformNames.EMR_A10, Map("UnionExec" -> 2.59, "Ceil" -> 2.59))
+ )
+
+ platformSpeedupEntries.foreach { case (platformName, speedupMap) =>
+ test(s"supported operator score from $platformName") {
+ val platform = PlatformFactory.createInstance(platformName)
+ val checker = new PluginTypeChecker(platform)
+ speedupMap.foreach { case (operator, speedup) =>
+ assert(checker.getSpeedupFactor(operator) == speedup)
+ }
+ }
}
test("supported operator score from custom speedup factor file") {
- val speedupFactorFile = ToolTestUtils.getTestResourcePath("operatorsScore-databricks-azure.csv")
+ // Using databricks azure speedup factor as custom file
+ val platform = PlatformFactory.createInstance(PlatformNames.DATABRICKS_AZURE)
+ val speedupFactorFile = ToolTestUtils.getTestResourcePath(platform.getOperatorScoreFile)
val checker = new PluginTypeChecker(speedupFactorFile=Some(speedupFactorFile))
assert(checker.getSpeedupFactor("SortExec") == 13.11)
assert(checker.getSpeedupFactor("FilterExec") == 3.14)
diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala
index 307dcb00a..447c203c9 100644
--- a/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala
+++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala
@@ -16,14 +16,14 @@
package com.nvidia.spark.rapids.tool.qualification
-import java.io.File
+import java.io.{File, PrintWriter}
import java.util.concurrent.TimeUnit.NANOSECONDS
import scala.collection.mutable.{ArrayBuffer, ListBuffer}
import scala.io.Source
import com.nvidia.spark.rapids.BaseTestSuite
-import com.nvidia.spark.rapids.tool.{EventLogPathProcessor, StatusReportCounts, ToolTestUtils}
+import com.nvidia.spark.rapids.tool.{EventLogPathProcessor, PlatformNames, StatusReportCounts, ToolTestUtils}
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.ml.feature.PCA
@@ -460,6 +460,67 @@ class QualificationSuite extends BaseTestSuite {
runQualificationTest(logFiles, "nds_q86_test_expectation.csv", expectedStatus = expectedStatus)
}
+ test("incomplete json file does not cause entire app to fail") {
+ // The purpose of this test is to make sure that the app is not skipped when the JSON parser
+ // encounters an unexpected EOF.
+ // There are two cases to evaluate:
+ // 1- An eventlog that has an end-to-end application but for some reason the EOF is incorrect
+ // 2- An eventlog of an unfinished app (missing SparkListenerApplicationEnd)
+
+ TrampolineUtil.withTempDir { eventLogDir =>
+ // generate the original eventlog
+ val (eventLog, _) = ToolTestUtils.generateEventLog(eventLogDir,
+ "WholeStageFilterProject") { spark =>
+ import spark.implicits._
+ val df = spark.sparkContext.makeRDD(1 to 100, 3).toDF
+ val df2 = spark.sparkContext.makeRDD(1 to 100, 3).toDF
+ df.select($"value" as "a")
+ .join(df2.select($"value" as "b"), $"a" === $"b")
+ .filter("(((b < 100) AND (a > 50)) OR (a = 0))")
+ .sort($"b")
+ }
+ // create the following files:
+ // 1- inprogress eventlog that does not contain "SparkListenerApplicationEnd" (unfinished)
+ // 2- inprogress eventlog with a terminated app (incomplete)
+ val unfinishedLog = new File(s"$eventLogDir/unfinished.inprogress")
+ val incompleteLog = new File(s"$eventLogDir/eventlog.inprogress")
+ val pwList = Array(new PrintWriter(unfinishedLog), new PrintWriter(incompleteLog))
+ val bufferedSource = Source.fromFile(eventLog)
+ try {
+ val allEventLines = bufferedSource.getLines.toList
+ val selectedLines: List[String] = allEventLines.dropRight(1)
+ selectedLines.foreach { line =>
+ pwList.foreach(pw => pw.println(line))
+ }
+ // add the "SparkListenerApplicationEnd" to the incompleteLog
+ pwList(1).println(allEventLines.last)
+ pwList.foreach( pw =>
+ pw.print("{\"Event\":\"SparkListenerEnvironmentUpdate\"," +
+ "\"JVM Information\":{\"Java Home:")
+ )
+ } finally {
+ bufferedSource.close()
+ pwList.foreach(pw => pw.close())
+ }
+ // All the eventlogs should be parsed successfully
+ // Status counts: 3 SUCCESS, 0 FAILURE, 0 UNKNOWN
+ val logFiles = Array(eventLog, incompleteLog.getAbsolutePath, unfinishedLog.getAbsolutePath)
+ // test Qualification
+ val outpath = new File(s"$eventLogDir/output_folder")
+ val allArgs = Array(
+ "--output-directory",
+ outpath.getAbsolutePath())
+
+ val appArgs = new QualificationArgs(allArgs ++ logFiles)
+ val (exit, appSum) = QualificationMain.mainInternal(appArgs)
+ assert(exit == 0)
+ assert(appSum.size == 3)
+ // test Profiler
+ val apps = ToolTestUtils.processProfileApps(logFiles, sparkSession)
+ assert(apps.size == 3)
+ }
+ }
+
test("spark2 eventlog") {
val profileLogDir = ToolTestUtils.getTestResourcePath("spark-events-profiling")
val log = s"$profileLogDir/spark2-eventlog.zstd"
@@ -1150,7 +1211,10 @@ class QualificationSuite extends BaseTestSuite {
val filename = s"$outpath/rapids_4_spark_qualification_output/" +
s"rapids_4_spark_qualification_output_unsupportedOperators.csv"
+ val stageDurationFile = s"$outpath/rapids_4_spark_qualification_output/" +
+ s"rapids_4_spark_qualification_output_unsupportedOperatorsStageDuration.csv"
val inputSource = Source.fromFile(filename)
+ val unsupportedStageDuration = Source.fromFile(stageDurationFile)
try {
val lines = inputSource.getLines.toSeq
// 1 for header, 1 for values
@@ -1166,6 +1230,11 @@ class QualificationSuite extends BaseTestSuite {
assert(lines.size == expLinesSize)
assert(lines.head.contains("App ID,Unsupported Type,"))
assert(lines(1).contains("\"Read\",\"JSON\",\"Types not supported - bigint:int\""))
+
+ val stageDurationLines = unsupportedStageDuration.getLines.toSeq
+ assert(stageDurationLines.head.contains("" +
+ "Stage Duration,App Duration,Recommendation"))
+ assert(stageDurationLines(1).contains("Not Recommended"))
} finally {
inputSource.close()
}
@@ -1337,292 +1406,29 @@ class QualificationSuite extends BaseTestSuite {
spark.sql("SELECT id, hour(current_timestamp()), second(to_timestamp(timestamp)) FROM t1")
}
- // run the qualification tool for onprem
- TrampolineUtil.withTempDir { outpath =>
- val appArgs = new QualificationArgs(Array(
- "--output-directory",
- outpath.getAbsolutePath,
- "--platform",
- "onprem",
- eventLog))
-
- val (exit, sumInfo) =
- QualificationMain.mainInternal(appArgs)
- assert(exit == 0)
-
- // the code above that runs the Spark query stops the Sparksession
- // so create a new one to read in the csv file
- createSparkSession()
-
- // validate that the SQL description in the csv file escapes commas properly
- val outputResults = s"$outpath/rapids_4_spark_qualification_output/" +
- s"rapids_4_spark_qualification_output.csv"
- val outputActual = readExpectedFile(new File(outputResults))
- assert(outputActual.collect().size == 1)
- }
-
- // run the qualification tool for emr. It should default to emr-t4.
- TrampolineUtil.withTempDir { outpath =>
- val appArgs = new QualificationArgs(Array(
- "--output-directory",
- outpath.getAbsolutePath,
- "--platform",
- "emr",
- eventLog))
-
- val (exit, sumInfo) =
- QualificationMain.mainInternal(appArgs)
- assert(exit == 0)
-
- // the code above that runs the Spark query stops the Sparksession
- // so create a new one to read in the csv file
- createSparkSession()
-
- // validate that the SQL description in the csv file escapes commas properly
- val outputResults = s"$outpath/rapids_4_spark_qualification_output/" +
- s"rapids_4_spark_qualification_output.csv"
- val outputActual = readExpectedFile(new File(outputResults))
- assert(outputActual.collect().size == 1)
- }
-
- // run the qualification tool for emr-t4
- TrampolineUtil.withTempDir { outpath =>
- val appArgs = new QualificationArgs(Array(
- "--output-directory",
- outpath.getAbsolutePath,
- "--platform",
- "emr-t4",
- eventLog))
-
- val (exit, sumInfo) =
- QualificationMain.mainInternal(appArgs)
- assert(exit == 0)
-
- // the code above that runs the Spark query stops the Sparksession
- // so create a new one to read in the csv file
- createSparkSession()
-
- // validate that the SQL description in the csv file escapes commas properly
- val outputResults = s"$outpath/rapids_4_spark_qualification_output/" +
- s"rapids_4_spark_qualification_output.csv"
- val outputActual = readExpectedFile(new File(outputResults))
- assert(outputActual.collect().size == 1)
- }
-
- // run the qualification tool for emr-a10
- TrampolineUtil.withTempDir { outpath =>
- val appArgs = new QualificationArgs(Array(
- "--output-directory",
- outpath.getAbsolutePath,
- "--platform",
- "emr-a10",
- eventLog))
-
- val (exit, sumInfo) =
- QualificationMain.mainInternal(appArgs)
- assert(exit == 0)
-
- // the code above that runs the Spark query stops the Sparksession
- // so create a new one to read in the csv file
- createSparkSession()
-
- // validate that the SQL description in the csv file escapes commas properly
- val outputResults = s"$outpath/rapids_4_spark_qualification_output/" +
- s"rapids_4_spark_qualification_output.csv"
- val outputActual = readExpectedFile(new File(outputResults))
- assert(outputActual.collect().size == 1)
- }
-
- // run the qualification tool for dataproc. It should default to dataproc-t4
- TrampolineUtil.withTempDir { outpath =>
- val appArgs = new QualificationArgs(Array(
- "--output-directory",
- outpath.getAbsolutePath,
- "--platform",
- "dataproc",
- eventLog))
-
- val (exit, sumInfo) =
- QualificationMain.mainInternal(appArgs)
- assert(exit == 0)
-
- // the code above that runs the Spark query stops the Sparksession
- // so create a new one to read in the csv file
- createSparkSession()
-
- // validate that the SQL description in the csv file escapes commas properly
- val outputResults = s"$outpath/rapids_4_spark_qualification_output/" +
- s"rapids_4_spark_qualification_output.csv"
- val outputActual = readExpectedFile(new File(outputResults))
- assert(outputActual.collect().size == 1)
- }
-
- // run the qualification tool for dataproc-t4
- TrampolineUtil.withTempDir { outpath =>
- val appArgs = new QualificationArgs(Array(
- "--output-directory",
- outpath.getAbsolutePath,
- "--platform",
- "dataproc-t4",
- eventLog))
-
- val (exit, sumInfo) =
- QualificationMain.mainInternal(appArgs)
- assert(exit == 0)
-
- // the code above that runs the Spark query stops the Sparksession
- // so create a new one to read in the csv file
- createSparkSession()
-
- // validate that the SQL description in the csv file escapes commas properly
- val outputResults = s"$outpath/rapids_4_spark_qualification_output/" +
- s"rapids_4_spark_qualification_output.csv"
- val outputActual = readExpectedFile(new File(outputResults))
- assert(outputActual.collect().size == 1)
- }
-
- // run the qualification tool for dataproc-l4
- TrampolineUtil.withTempDir { outpath =>
- val appArgs = new QualificationArgs(Array(
- "--output-directory",
- outpath.getAbsolutePath,
- "--platform",
- "dataproc-l4",
- eventLog))
-
- val (exit, sumInfo) =
- QualificationMain.mainInternal(appArgs)
- assert(exit == 0)
-
- // the code above that runs the Spark query stops the Sparksession
- // so create a new one to read in the csv file
- createSparkSession()
-
- // validate that the SQL description in the csv file escapes commas properly
- val outputResults = s"$outpath/rapids_4_spark_qualification_output/" +
- s"rapids_4_spark_qualification_output.csv"
- val outputActual = readExpectedFile(new File(outputResults))
- assert(outputActual.collect().size == 1)
- }
-
- // run the qualification tool for dataproc-serverless-l4
- TrampolineUtil.withTempDir { outpath =>
- val appArgs = new QualificationArgs(Array(
- "--output-directory",
- outpath.getAbsolutePath,
- "--platform",
- "dataproc-serverless-l4",
- eventLog))
-
- val (exit, sumInfo) =
- QualificationMain.mainInternal(appArgs)
- assert(exit == 0)
-
- // the code above that runs the Spark query stops the Sparksession
- // so create a new one to read in the csv file
- createSparkSession()
-
- // validate that the SQL description in the csv file escapes commas properly
- val outputResults = s"$outpath/rapids_4_spark_qualification_output/" +
- s"rapids_4_spark_qualification_output.csv"
- val outputActual = readExpectedFile(new File(outputResults))
- assert(outputActual.collect().size == 1)
- }
-
- // run the qualification tool for dataproc-gke-t4
- TrampolineUtil.withTempDir { outpath =>
- val appArgs = new QualificationArgs(Array(
- "--output-directory",
- outpath.getAbsolutePath,
- "--platform",
- "dataproc-gke-t4",
- eventLog))
-
- val (exit, _) =
- QualificationMain.mainInternal(appArgs)
- assert(exit == 0)
-
- // the code above that runs the Spark query stops the Sparksession
- // so create a new one to read in the csv file
- createSparkSession()
-
- // validate that the SQL description in the csv file escapes commas properly
- val outputResults = s"$outpath/rapids_4_spark_qualification_output/" +
- s"rapids_4_spark_qualification_output.csv"
- val outputActual = readExpectedFile(new File(outputResults))
- assert(outputActual.collect().size == 1)
- }
-
- // run the qualification tool for dataproc-gke-l4
- TrampolineUtil.withTempDir { outpath =>
- val appArgs = new QualificationArgs(Array(
- "--output-directory",
- outpath.getAbsolutePath,
- "--platform",
- "dataproc-gke-l4",
- eventLog))
-
- val (exit, _) =
- QualificationMain.mainInternal(appArgs)
- assert(exit == 0)
-
- // the code above that runs the Spark query stops the Sparksession
- // so create a new one to read in the csv file
- createSparkSession()
-
- // validate that the SQL description in the csv file escapes commas properly
- val outputResults = s"$outpath/rapids_4_spark_qualification_output/" +
- s"rapids_4_spark_qualification_output.csv"
- val outputActual = readExpectedFile(new File(outputResults))
- assert(outputActual.collect().size == 1)
- }
-
- // run the qualification tool for databricks-aws
- TrampolineUtil.withTempDir { outpath =>
- val appArgs = new QualificationArgs(Array(
- "--output-directory",
- outpath.getAbsolutePath,
- "--platform",
- "databricks-aws",
- eventLog))
-
- val (exit, sumInfo) =
- QualificationMain.mainInternal(appArgs)
- assert(exit == 0)
-
- // the code above that runs the Spark query stops the Sparksession
- // so create a new one to read in the csv file
- createSparkSession()
-
- // validate that the SQL description in the csv file escapes commas properly
- val outputResults = s"$outpath/rapids_4_spark_qualification_output/" +
- s"rapids_4_spark_qualification_output.csv"
- val outputActual = readExpectedFile(new File(outputResults))
- assert(outputActual.collect().size == 1)
- }
-
- // run the qualification tool for databricks-azure
- TrampolineUtil.withTempDir { outpath =>
- val appArgs = new QualificationArgs(Array(
- "--output-directory",
- outpath.getAbsolutePath,
- "--platform",
- "databricks-azure",
- eventLog))
+ PlatformNames.getAllNames.foreach { platform =>
+ // run the qualification tool for each platform
+ TrampolineUtil.withTempDir { outPath =>
+ val appArgs = new QualificationArgs(Array(
+ "--output-directory",
+ outPath.getAbsolutePath,
+ "--platform",
+ platform,
+ eventLog))
- val (exit, sumInfo) =
- QualificationMain.mainInternal(appArgs)
- assert(exit == 0)
+ val (exit, _) = QualificationMain.mainInternal(appArgs)
+ assert(exit == 0)
- // the code above that runs the Spark query stops the Sparksession
- // so create a new one to read in the csv file
- createSparkSession()
+ // the code above that runs the Spark query stops the Spark Session,
+ // so create a new one to read in the csv file
+ createSparkSession()
- // validate that the SQL description in the csv file escapes commas properly
- val outputResults = s"$outpath/rapids_4_spark_qualification_output/" +
- s"rapids_4_spark_qualification_output.csv"
- val outputActual = readExpectedFile(new File(outputResults))
- assert(outputActual.collect().size == 1)
+ // validate that the SQL description in the csv file escapes commas properly
+ val outputResults = s"$outPath/rapids_4_spark_qualification_output/" +
+ s"rapids_4_spark_qualification_output.csv"
+ val outputActual = readExpectedFile(new File(outputResults))
+ assert(outputActual.collect().length == 1)
+ }
}
}
}
diff --git a/user_tools/custom_speedup_factors/operatorsList.csv b/user_tools/custom_speedup_factors/operatorsList.csv
index 7327d8054..986b88ed7 100644
--- a/user_tools/custom_speedup_factors/operatorsList.csv
+++ b/user_tools/custom_speedup_factors/operatorsList.csv
@@ -98,6 +98,7 @@ Expm1
First
Flatten
Floor
+FormatNumber
FromUTCTimestamp
FromUnixTime
GetArrayItem
@@ -168,6 +169,7 @@ Not
NthValue
OctetLength
Or
+Percentile
PercentRank
PivotFirst
Pmod
@@ -207,6 +209,7 @@ SortOrder
SparkPartitionID
SpecifiedWindowFrame
Sqrt
+Stack
StartsWith
StddevPop
StddevSamp
@@ -222,6 +225,7 @@ StringTranslate
StringTrim
StringTrimLeft
StringTrimRight
+StructsToJson
Substring
SubstringIndex
Subtract
diff --git a/user_tools/docs/user-tools-databricks-aws.md b/user_tools/docs/user-tools-databricks-aws.md
index 8e94e654d..2e9198af4 100644
--- a/user_tools/docs/user-tools-databricks-aws.md
+++ b/user_tools/docs/user-tools-databricks-aws.md
@@ -43,7 +43,7 @@ Before running any command, you can set environment variables to specify configu
- RAPIDS variables have a naming pattern `RAPIDS_USER_TOOLS_*`:
- `RAPIDS_USER_TOOLS_CACHE_FOLDER`: specifies the location of a local directory that the RAPIDS-cli uses to store and cache the downloaded resources. The default is `/var/tmp/spark_rapids_user_tools_cache`. Note that caching the resources locally has an impact on the total execution time of the command.
- `RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY`: specifies the location of a local directory that the RAPIDS-cli uses to generate the output. The wrapper CLI arguments override that environment variable (`--local_folder` for Qualification).
-- For Databricks CLI, some environment variables can be set and picked by the RAPIDS-user tools such as: `DATABRICKS_CONFIG_FILE`, `DATABRICKS_HOST` and `DATABRICKS_TOKEN`. See the description of the variables in [Environment variables](https://docs.databricks.com/en/dev-tools/auth.html#environment-variables-and-fields-for-client-unified-authentication).
+- For Databricks CLI, some environment variables can be set and picked by the RAPIDS-user tools such as: `DATABRICKS_CONFIG_FILE`, `DATABRICKS_HOST` and `DATABRICKS_TOKEN`. See the description of the variables in [Environment variables](https://docs.databricks.com/en/dev-tools/auth/index.html#environment-variables-and-fields-for-client-unified-authentication).
- For AWS CLI, some environment variables can be set and picked by the RAPIDS-user tools such as: `AWS_SHARED_CREDENTIALS_FILE`, `AWS_CONFIG_FILE`, `AWS_REGION`, `AWS_DEFAULT_REGION`, `AWS_PROFILE` and `AWS_DEFAULT_OUTPUT`. See the full list of variables in [aws-cli-configure-envvars](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html).
## Qualification command
diff --git a/user_tools/docs/user-tools-databricks-azure.md b/user_tools/docs/user-tools-databricks-azure.md
index 2605b70e8..96cf6888e 100644
--- a/user_tools/docs/user-tools-databricks-azure.md
+++ b/user_tools/docs/user-tools-databricks-azure.md
@@ -47,7 +47,7 @@ Before running any command, you can set environment variables to specify configu
- RAPIDS variables have a naming pattern `RAPIDS_USER_TOOLS_*`:
- `RAPIDS_USER_TOOLS_CACHE_FOLDER`: specifies the location of a local directory that the RAPIDS-cli uses to store and cache the downloaded resources. The default is `/var/tmp/spark_rapids_user_tools_cache`. Note that caching the resources locally has an impact on the total execution time of the command.
- `RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY`: specifies the location of a local directory that the RAPIDS-cli uses to generate the output. The wrapper CLI arguments override that environment variable (`--local_folder` for Qualification).
-- For Databricks CLI, some environment variables can be set and picked up by the RAPIDS-user tools such as: `DATABRICKS_CONFIG_FILE`, `DATABRICKS_HOST` and `DATABRICKS_TOKEN`. See the description of the variables in [Environment variables](https://docs.databricks.com/en/dev-tools/auth.html#environment-variables-and-fields-for-client-unified-authentication).
+- For Databricks CLI, some environment variables can be set and picked up by the RAPIDS-user tools such as: `DATABRICKS_CONFIG_FILE`, `DATABRICKS_HOST` and `DATABRICKS_TOKEN`. See the description of the variables in [Environment variables](https://docs.databricks.com/en/dev-tools/auth/index.html#environment-variables-and-fields-for-client-unified-authentication).
- For Azure CLI, some environment variables can be set and picked up by the RAPIDS-user tools such as: `AZURE_CONFIG_FILE` and `AZURE_DEFAULTS_LOCATION`.
## Qualification command
diff --git a/user_tools/pyproject.toml b/user_tools/pyproject.toml
index 803708c94..a9797e3b2 100644
--- a/user_tools/pyproject.toml
+++ b/user_tools/pyproject.toml
@@ -37,7 +37,7 @@ dependencies = [
"pylint-pydantic==0.3.0",
# used for common API to access remote filesystems like local/s3/gcs/hdfs
# this will include numpy
- "pyarrow==12.0.1",
+ "pyarrow==14.0.1",
# used for ADLS filesystem implementation
# Issue-568: use 12.17.0 as the new 12.18.0 causes an error in runtime
"azure-storage-blob==12.17.0",
@@ -49,7 +49,7 @@ dynamic=["entry-points", "version"]
[project.scripts]
spark_rapids_user_tools = "spark_rapids_pytools.wrapper:main"
-ascli = "spark_rapids_tools.cmdli.tools_cli:main"
+spark_rapids = "spark_rapids_tools.cmdli.tools_cli:main"
[tool.setuptools]
package-dir = {"" = "src"}
diff --git a/user_tools/src/spark_rapids_pytools/__init__.py b/user_tools/src/spark_rapids_pytools/__init__.py
index 8672e1571..ecf7a8978 100644
--- a/user_tools/src/spark_rapids_pytools/__init__.py
+++ b/user_tools/src/spark_rapids_pytools/__init__.py
@@ -16,5 +16,5 @@
from spark_rapids_pytools.build import get_version
-VERSION = '23.10.1'
+VERSION = '23.10.2'
__version__ = get_version(VERSION)
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py b/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py
index c2fc5f19b..7f6b5abe8 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py
@@ -70,9 +70,8 @@ def get_platform_name(self) -> str:
This used to get the lower case of the platform of the runtime.
:return: the name of the platform of the runtime in lower_case.
"""
- if self.platform is not None:
- if self.platform == 'dataproc':
- self_id = CspEnv.DATAPROC
+ if self.platform is not None and self.platform == 'dataproc':
+ self_id = CspEnv.DATAPROC
else:
self_id = self.type_id
return CspEnv.pretty_print(self_id)
diff --git a/user_tools/src/spark_rapids_pytools/common/utilities.py b/user_tools/src/spark_rapids_pytools/common/utilities.py
index fb2666cd7..5ac5cdd97 100644
--- a/user_tools/src/spark_rapids_pytools/common/utilities.py
+++ b/user_tools/src/spark_rapids_pytools/common/utilities.py
@@ -45,6 +45,7 @@
class Utils:
"""Utility class used to enclose common helpers and utilities."""
+ warning_issued = False
@classmethod
def gen_random_string(cls, str_length: int) -> str:
@@ -208,6 +209,26 @@ def gen_multiline_str(cls, *items) -> str:
def get_os_name(cls) -> str:
return os.uname().sysname
+ @classmethod
+ def get_value_or_pop(cls, provided_value, options_dict, short_flag, default_value=None):
+ """
+ Gets a value or pops it from the provided options dictionary if the value is not explicitly provided.
+
+ :param provided_value: The value to return if not None.
+ :param options_dict: Dictionary containing options.
+ :param short_flag: Flag to look for in options_dict.
+ :param default_value: The default value to return if the target_key is not found. Defaults to None.
+ :return: provided_value or the value from options_dict or the default_value.
+ """
+ if provided_value is not None:
+ return provided_value
+ if short_flag in options_dict:
+ if not cls.warning_issued:
+ cls.warning_issued = True
+ print('Warning: Instead of using short flags for argument, consider providing the value directly.')
+ return options_dict.pop(short_flag)
+ return default_value
+
class ToolLogging:
"""Holds global utilities used for logging."""
diff --git a/user_tools/src/spark_rapids_pytools/rapids/profiling.py b/user_tools/src/spark_rapids_pytools/rapids/profiling.py
index 0c6e1cfcc..2c31af406 100644
--- a/user_tools/src/spark_rapids_pytools/rapids/profiling.py
+++ b/user_tools/src/spark_rapids_pytools/rapids/profiling.py
@@ -247,7 +247,7 @@ def _process_output(self):
self.__generate_report_with_recommendations()
def _init_rapids_arg_list(self) -> List[str]:
- return self._create_autotuner_rapids_args()
+ return super()._init_rapids_arg_list() + self._create_autotuner_rapids_args()
@dataclass
diff --git a/user_tools/src/spark_rapids_pytools/rapids/qualification.py b/user_tools/src/spark_rapids_pytools/rapids/qualification.py
index 7b5ed96b1..7ed1d3cff 100644
--- a/user_tools/src/spark_rapids_pytools/rapids/qualification.py
+++ b/user_tools/src/spark_rapids_pytools/rapids/qualification.py
@@ -770,10 +770,6 @@ def _write_summary(self):
if wrapper_out_content is not None:
print(Utils.gen_multiline_str(wrapper_out_content))
- def _init_rapids_arg_list(self) -> List[str]:
- # TODO: Make sure we add this argument only for jar versions 23.02+
- return ['--platform', self.ctxt.platform.get_platform_name().replace('_', '-')]
-
def _generate_section_lines(self, sec_conf: dict) -> List[str]:
# TODO: we may like to show the scripts even when the gpu-cluster is not defined
# this requires that we allow to generate the script without the gpu-cluster
diff --git a/user_tools/src/spark_rapids_pytools/rapids/rapids_tool.py b/user_tools/src/spark_rapids_pytools/rapids/rapids_tool.py
index 7fbbd9b09..41214f65c 100644
--- a/user_tools/src/spark_rapids_pytools/rapids/rapids_tool.py
+++ b/user_tools/src/spark_rapids_pytools/rapids/rapids_tool.py
@@ -756,7 +756,8 @@ def _process_local_job_submission_args(self):
self.ctxt.update_job_args(job_args)
def _init_rapids_arg_list(self) -> List[str]:
- return []
+ # TODO: Make sure we add this argument only for jar versions 23.02+
+ return ['--platform', self.ctxt.platform.get_platform_name().replace('_', '-')]
@timeit('Building Job Arguments and Executing Job CMD') # pylint: disable=too-many-function-args
def _prepare_local_job_arguments(self):
diff --git a/user_tools/src/spark_rapids_pytools/wrappers/databricks_aws_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/databricks_aws_wrapper.py
index 8411eb6db..a62ad9286 100644
--- a/user_tools/src/spark_rapids_pytools/wrappers/databricks_aws_wrapper.py
+++ b/user_tools/src/spark_rapids_pytools/wrappers/databricks_aws_wrapper.py
@@ -16,7 +16,7 @@
"""Wrapper class to run tools associated with RAPIDS Accelerator for Apache Spark plugin on DATABRICKS_AWS."""
from spark_rapids_tools import CspEnv
from spark_rapids_pytools.cloud_api.sp_types import DeployMode
-from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.common.utilities import Utils, ToolLogging
from spark_rapids_pytools.rapids.diagnostic import Diagnostic
from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType
@@ -40,8 +40,8 @@ def qualification(cpu_cluster: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
- jvm_heap_size: int = 24,
- verbose: bool = False,
+ jvm_heap_size: int = None,
+ verbose: bool = None,
cpu_discount: int = None,
gpu_discount: int = None,
global_discount: int = None,
@@ -105,6 +105,15 @@ def qualification(cpu_cluster: str = None,
For more details on Qualification tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
"""
+ verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+ profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
+ aws_profile = Utils.get_value_or_pop(aws_profile, rapids_options, 'a')
+ remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+ jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+ eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+ filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
+ tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+ local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
@@ -150,8 +159,8 @@ def profiling(gpu_cluster: str = None,
remote_folder: str = None,
tools_jar: str = None,
credentials_file: str = None,
- jvm_heap_size: int = 24,
- verbose: bool = False,
+ jvm_heap_size: int = None,
+ verbose: bool = None,
**rapids_options) -> None:
"""
The Profiling tool analyzes both CPU or GPU generated event logs and generates information
@@ -192,6 +201,17 @@ def profiling(gpu_cluster: str = None,
For more details on Profiling tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
"""
+ verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+ profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
+ aws_profile = Utils.get_value_or_pop(aws_profile, rapids_options, 'a')
+ credentials_file = Utils.get_value_or_pop(credentials_file, rapids_options, 'c')
+ gpu_cluster = Utils.get_value_or_pop(gpu_cluster, rapids_options, 'g')
+ remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+ jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+ eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+ tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+ worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
+ local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
diff --git a/user_tools/src/spark_rapids_pytools/wrappers/databricks_azure_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/databricks_azure_wrapper.py
index 197c42a04..f29582f20 100644
--- a/user_tools/src/spark_rapids_pytools/wrappers/databricks_azure_wrapper.py
+++ b/user_tools/src/spark_rapids_pytools/wrappers/databricks_azure_wrapper.py
@@ -16,7 +16,7 @@
"""Wrapper class to run tools associated with RAPIDS Accelerator for Apache Spark plugin on DATABRICKS_AZURE."""
from spark_rapids_tools import CspEnv
from spark_rapids_pytools.cloud_api.sp_types import DeployMode
-from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.common.utilities import Utils, ToolLogging
from spark_rapids_pytools.rapids.diagnostic import Diagnostic
from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType
@@ -39,8 +39,8 @@ def qualification(cpu_cluster: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
- jvm_heap_size: int = 24,
- verbose: bool = False,
+ jvm_heap_size: int = None,
+ verbose: bool = None,
cpu_discount: int = None,
gpu_discount: int = None,
global_discount: int = None,
@@ -103,6 +103,14 @@ def qualification(cpu_cluster: str = None,
For more details on Qualification tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
"""
+ verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+ profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
+ remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+ jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+ eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+ filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
+ tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+ local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
@@ -146,8 +154,8 @@ def profiling(gpu_cluster: str = None,
remote_folder: str = None,
tools_jar: str = None,
credentials_file: str = None,
- jvm_heap_size: int = 24,
- verbose: bool = False,
+ jvm_heap_size: int = None,
+ verbose: bool = None,
**rapids_options) -> None:
"""
The Profiling tool analyzes both CPU or GPU generated event logs and generates information
@@ -186,6 +194,16 @@ def profiling(gpu_cluster: str = None,
For more details on Profiling tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
"""
+ verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+ profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
+ credentials_file = Utils.get_value_or_pop(credentials_file, rapids_options, 'c')
+ gpu_cluster = Utils.get_value_or_pop(gpu_cluster, rapids_options, 'g')
+ remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+ jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+ eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+ tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+ worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
+ local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
diff --git a/user_tools/src/spark_rapids_pytools/wrappers/dataproc_gke_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/dataproc_gke_wrapper.py
index 251347a28..23868aa77 100644
--- a/user_tools/src/spark_rapids_pytools/wrappers/dataproc_gke_wrapper.py
+++ b/user_tools/src/spark_rapids_pytools/wrappers/dataproc_gke_wrapper.py
@@ -16,7 +16,7 @@
from spark_rapids_tools import CspEnv
from spark_rapids_pytools.cloud_api.sp_types import DeployMode
-from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.common.utilities import Utils, ToolLogging
from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType
@@ -36,8 +36,8 @@ def qualification(cpu_cluster: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
- jvm_heap_size: int = 24,
- verbose: bool = False,
+ jvm_heap_size: int = None,
+ verbose: bool = None,
cpu_discount: int = None,
gpu_discount: int = None,
global_discount: int = None,
@@ -100,6 +100,13 @@ def qualification(cpu_cluster: str = None,
For more details on Qualification tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
"""
+ verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+ remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+ jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+ eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+ filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
+ tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+ local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
diff --git a/user_tools/src/spark_rapids_pytools/wrappers/dataproc_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/dataproc_wrapper.py
index a02fee408..8736d9cd2 100644
--- a/user_tools/src/spark_rapids_pytools/wrappers/dataproc_wrapper.py
+++ b/user_tools/src/spark_rapids_pytools/wrappers/dataproc_wrapper.py
@@ -16,7 +16,7 @@
from spark_rapids_tools import CspEnv
from spark_rapids_pytools.cloud_api.sp_types import DeployMode
-from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.common.utilities import Utils, ToolLogging
from spark_rapids_pytools.rapids.bootstrap import Bootstrap
from spark_rapids_pytools.rapids.diagnostic import Diagnostic
from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
@@ -39,8 +39,8 @@ def qualification(cpu_cluster: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
- jvm_heap_size: int = 24,
- verbose: bool = False,
+ jvm_heap_size: int = None,
+ verbose: bool = None,
cpu_discount: int = None,
gpu_discount: int = None,
global_discount: int = None,
@@ -102,6 +102,13 @@ def qualification(cpu_cluster: str = None,
For more details on Qualification tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
"""
+ verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+ remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+ jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+ eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+ filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
+ tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+ local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
@@ -143,8 +150,8 @@ def profiling(gpu_cluster: str = None,
remote_folder: str = None,
tools_jar: str = None,
credentials_file: str = None,
- jvm_heap_size: int = 24,
- verbose: bool = False,
+ jvm_heap_size: int = None,
+ verbose: bool = None,
**rapids_options) -> None:
"""
The Profiling tool analyzes both CPU or GPU generated event logs and generates information
@@ -183,6 +190,15 @@ def profiling(gpu_cluster: str = None,
For more details on Profiling tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
"""
+ verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+ credentials_file = Utils.get_value_or_pop(credentials_file, rapids_options, 'c')
+ gpu_cluster = Utils.get_value_or_pop(gpu_cluster, rapids_options, 'g')
+ remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+ jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+ eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+ tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+ worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
+ local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
diff --git a/user_tools/src/spark_rapids_pytools/wrappers/emr_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/emr_wrapper.py
index fea22b044..9cad16338 100644
--- a/user_tools/src/spark_rapids_pytools/wrappers/emr_wrapper.py
+++ b/user_tools/src/spark_rapids_pytools/wrappers/emr_wrapper.py
@@ -16,7 +16,7 @@
"""Wrapper class to run tools associated with RAPIDS Accelerator for Apache Spark plugin on AWS-EMR."""
from spark_rapids_tools import CspEnv
from spark_rapids_pytools.cloud_api.sp_types import DeployMode
-from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.common.utilities import Utils, ToolLogging
from spark_rapids_pytools.rapids.bootstrap import Bootstrap
from spark_rapids_pytools.rapids.diagnostic import Diagnostic
from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, \
@@ -40,8 +40,8 @@ def qualification(cpu_cluster: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
- jvm_heap_size: int = 24,
- verbose: bool = False,
+ jvm_heap_size: int = None,
+ verbose: bool = None,
cpu_discount: int = None,
gpu_discount: int = None,
global_discount: int = None,
@@ -100,6 +100,14 @@ def qualification(cpu_cluster: str = None,
For more details on Qualification tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
"""
+ verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+ profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
+ remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+ jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+ eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+ filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
+ tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+ local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
@@ -140,8 +148,8 @@ def profiling(gpu_cluster: str = None,
local_folder: str = None,
remote_folder: str = None,
tools_jar: str = None,
- jvm_heap_size: int = 24,
- verbose: bool = False,
+ jvm_heap_size: int = None,
+ verbose: bool = None,
**rapids_options) -> None:
"""
The Profiling tool analyzes both CPU or GPU generated event logs and generates information
@@ -177,6 +185,15 @@ def profiling(gpu_cluster: str = None,
For more details on Profiling tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
"""
+ verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+ profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
+ gpu_cluster = Utils.get_value_or_pop(gpu_cluster, rapids_options, 'g')
+ remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+ jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+ eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+ tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+ worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
+ local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
diff --git a/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py
index 4d50b0c28..048f3582a 100644
--- a/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py
+++ b/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py
@@ -16,7 +16,7 @@
"""Wrapper class to run tools associated with RAPIDS Accelerator for Apache Spark plugin on On-Prem cluster."""
from spark_rapids_tools import CspEnv
from spark_rapids_pytools.cloud_api.sp_types import DeployMode
-from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.common.utilities import Utils, ToolLogging
from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType
@@ -36,8 +36,8 @@ def qualification(cpu_cluster: str = None,
target_platform: str = None,
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
- jvm_heap_size: int = 24,
- verbose: bool = False,
+ jvm_heap_size: int = None,
+ verbose: bool = None,
cpu_discount: int = None,
gpu_discount: int = None,
global_discount: int = None,
@@ -80,6 +80,11 @@ def qualification(cpu_cluster: str = None,
For more details on Qualification tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
"""
+ verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+ jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+ eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+ filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
+ local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
@@ -132,8 +137,8 @@ def profiling(worker_info: str = None,
eventlogs: str = None,
local_folder: str = None,
tools_jar: str = None,
- jvm_heap_size: int = 24,
- verbose: bool = False,
+ jvm_heap_size: int = None,
+ verbose: bool = None,
**rapids_options) -> None:
"""
The Profiling tool analyzes both CPU or GPU generated event logs and generates information
@@ -158,7 +163,12 @@ def profiling(worker_info: str = None,
For more details on Profiling tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
"""
-
+ verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+ jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+ eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+ tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+ worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
+ local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
diff --git a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py
index 67624ac7a..6605ecb3b 100644
--- a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py
+++ b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py
@@ -168,11 +168,21 @@ def detect_platform_from_eventlogs_prefix(self):
self.p_args['toolArgs']['platform'] = map_storage_to_platform[storage_type]
def validate_onprem_with_cluster_name(self):
- if self.platform == CspEnv.ONPREM:
+ # this field has already been populated during initialization
+ selected_platform = self.p_args['toolArgs']['platform']
+ if selected_platform == CspEnv.ONPREM:
raise PydanticCustomError(
'invalid_argument',
f'Cannot run cluster by name with platform [{CspEnv.ONPREM}]\n Error:')
+ def validate_onprem_with_cluster_props_without_eventlogs(self):
+ # this field has already been populated during initialization
+ selected_platform = self.p_args['toolArgs']['platform']
+ if selected_platform == CspEnv.ONPREM:
+ raise PydanticCustomError(
+ 'invalid_argument',
+ f'Cannot run cluster by properties with platform [{CspEnv.ONPREM}] without event logs\n Error:')
+
def init_extra_arg_cases(self) -> list:
return []
@@ -202,21 +212,24 @@ def define_extra_arg_cases(self):
def build_tools_args(self) -> dict:
pass
- def apply_arg_cases(self):
- for curr_cases in [self.rejected, self.detected, self.extra]:
+ def apply_arg_cases(self, cases_list: list):
+ for curr_cases in cases_list:
for case_key, case_value in curr_cases.items():
if any(ArgValueCase.array_equal(self.argv_cases, case_i) for case_i in case_value['cases']):
# debug the case key
self.logger.info('...applying argument case: %s', case_key)
case_value['callable']()
+ def apply_all_arg_cases(self):
+ self.apply_arg_cases([self.rejected, self.detected, self.extra])
+
def validate_arguments(self):
self.init_tool_args()
self.init_arg_cases()
self.define_invalid_arg_cases()
self.define_detection_cases()
self.define_extra_arg_cases()
- self.apply_arg_cases()
+ self.apply_all_arg_cases()
def get_or_set_platform(self) -> CspEnv:
if self.p_args['toolArgs']['platform'] is None:
@@ -224,17 +237,14 @@ def get_or_set_platform(self) -> CspEnv:
runtime_platform = CspEnv.get_default()
else:
runtime_platform = self.p_args['toolArgs']['platform']
- self.post_platform_assignment_validation(runtime_platform)
+ self.post_platform_assignment_validation()
return runtime_platform
- def post_platform_assignment_validation(self, assigned_platform):
- # do some validation after we decide the cluster type
- if self.argv_cases[1] == ArgValueCase.VALUE_A:
- if assigned_platform == CspEnv.ONPREM:
- # it is not allowed to run cluster_by_name on an OnPrem platform
- raise PydanticCustomError(
- 'invalid_argument',
- f'Cannot run cluster by name with platform [{CspEnv.ONPREM}]\n Error:')
+ def post_platform_assignment_validation(self):
+ # Update argv_cases to reflect the platform
+ self.argv_cases[0] = ArgValueCase.VALUE_A
+ # Any validation post platform assignment should be done here
+ self.apply_arg_cases([self.rejected, self.extra])
@dataclass
@@ -278,6 +288,13 @@ def define_invalid_arg_cases(self):
[ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.IGNORE]
]
}
+ self.rejected['Cluster By Properties Cannot go with OnPrem'] = {
+ 'valid': False,
+ 'callable': partial(self.validate_onprem_with_cluster_props_without_eventlogs),
+ 'cases': [
+ [ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED]
+ ]
+ }
def define_detection_cases(self):
self.detected['Define Platform from Cluster Properties file'] = {
diff --git a/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py b/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py
index 238f83a89..5a63aebe5 100644
--- a/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py
+++ b/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py
@@ -19,7 +19,7 @@
from spark_rapids_tools.enums import QualGpuClusterReshapeType
from spark_rapids_tools.utils.util import gen_app_banner, init_environment
-from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.common.utilities import Utils, ToolLogging
from spark_rapids_pytools.rapids.bootstrap import Bootstrap
from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
from spark_rapids_pytools.rapids.qualification import QualificationAsLocal
@@ -48,7 +48,7 @@ def qualification(self,
global_discount: int = None,
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
- verbose: bool = False,
+ verbose: bool = None,
**rapids_options):
"""The Qualification cmd provides estimated running costs and speedups by migrating Apache
Spark applications to GPU accelerated clusters.
@@ -105,6 +105,11 @@ def qualification(self,
For more details on Qualification tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
"""
+ platform = Utils.get_value_or_pop(platform, rapids_options, 'p')
+ target_platform = Utils.get_value_or_pop(target_platform, rapids_options, 't')
+ output_folder = Utils.get_value_or_pop(output_folder, rapids_options, 'o')
+ filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
+ verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
if verbose:
ToolLogging.enable_debug_mode()
init_environment('qual')
@@ -133,7 +138,7 @@ def profiling(self,
cluster: str = None,
platform: str = None,
output_folder: str = None,
- verbose: bool = False,
+ verbose: bool = None,
**rapids_options):
"""The Profiling cmd provides information which can be used for debugging and profiling
Apache Spark applications running on accelerated GPU cluster.
@@ -159,6 +164,11 @@ def profiling(self,
For more details on Profiling tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
"""
+ eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+ cluster = Utils.get_value_or_pop(cluster, rapids_options, 'c')
+ platform = Utils.get_value_or_pop(platform, rapids_options, 'p')
+ output_folder = Utils.get_value_or_pop(output_folder, rapids_options, 'o')
+ verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
if verbose:
ToolLogging.enable_debug_mode()
init_environment('prof')
diff --git a/user_tools/src/spark_rapids_tools/utils/util.py b/user_tools/src/spark_rapids_tools/utils/util.py
index e8ed7e05d..8ff5a1975 100644
--- a/user_tools/src/spark_rapids_tools/utils/util.py
+++ b/user_tools/src/spark_rapids_tools/utils/util.py
@@ -91,7 +91,7 @@ def to_snake_case(word: str) -> str:
def dump_tool_usage(tool_name: Optional[str], raise_sys_exit: Optional[bool] = True):
imported_module = __import__('spark_rapids_tools.cmdli', globals(), locals(), ['ToolsCLI'])
wrapper_clzz = getattr(imported_module, 'ToolsCLI')
- help_name = 'ascli'
+ help_name = 'spark_rapids'
usage_cmd = f'{tool_name} -- --help'
try:
fire.Fire(wrapper_clzz(), name=help_name, command=usage_cmd)
diff --git a/user_tools/tests/spark_rapids_tools_ut/conftest.py b/user_tools/tests/spark_rapids_tools_ut/conftest.py
index e29cefc3c..145355f24 100644
--- a/user_tools/tests/spark_rapids_tools_ut/conftest.py
+++ b/user_tools/tests/spark_rapids_tools_ut/conftest.py
@@ -16,7 +16,7 @@
import sys
-import pytest # pylint: disable=import-error
+import pytest # pylint: disable=import-error
def get_test_resources_path():
@@ -46,9 +46,10 @@ def gen_cpu_cluster_props():
# all csps except onprem
csps = ['dataproc', 'dataproc_gke', 'emr', 'databricks_aws', 'databricks_azure']
all_csps = csps + ['onprem']
+autotuner_prop_path = 'worker_info.yaml'
-class SparkRapidsToolsUT: # pylint: disable=too-few-public-methods
+class SparkRapidsToolsUT: # pylint: disable=too-few-public-methods
@pytest.fixture(autouse=True)
def get_ut_data_dir(self):
diff --git a/user_tools/tests/spark_rapids_tools_ut/resources/worker_info.yaml b/user_tools/tests/spark_rapids_tools_ut/resources/worker_info.yaml
new file mode 100644
index 000000000..d9aaa14d5
--- /dev/null
+++ b/user_tools/tests/spark_rapids_tools_ut/resources/worker_info.yaml
@@ -0,0 +1,19 @@
+system:
+ numCores: 32
+ memory: 212992MiB
+ numWorkers: 5
+gpu:
+ memory: 15109MiB
+ count: 4
+ name: T4
+softwareProperties:
+ spark.driver.maxResultSize: 7680m
+ spark.driver.memory: 15360m
+ spark.executor.cores: '8'
+ spark.executor.instances: '2'
+ spark.executor.memory: 47222m
+ spark.executorEnv.OPENBLAS_NUM_THREADS: '1'
+ spark.scheduler.mode: FAIR
+ spark.sql.cbo.enabled: 'true'
+ spark.ui.port: '0'
+ spark.yarn.am.memory: 640m
diff --git a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py
index 0e5d496d0..76e694c81 100644
--- a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py
+++ b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py
@@ -15,16 +15,16 @@
"""Test Tool argument validators"""
import dataclasses
+import warnings
from collections import defaultdict
from typing import Dict, Callable, List
-import fire
import pytest # pylint: disable=import-error
from spark_rapids_tools import CspEnv
from spark_rapids_tools.cmdli.argprocessor import AbsToolUserArgModel, ArgValueCase
from spark_rapids_tools.enums import QualFilterApp
-from .conftest import SparkRapidsToolsUT, all_cpu_cluster_props, csp_cpu_cluster_props, csps
+from .conftest import SparkRapidsToolsUT, autotuner_prop_path, all_cpu_cluster_props, all_csps
@dataclasses.dataclass
@@ -52,6 +52,7 @@ def decorator(func_cb: Callable):
triplet_test_registry[obj_k] = argv_obj
argv_obj.tests.append(func_cb.__name__)
return func_cb
+
return decorator
@@ -74,112 +75,238 @@ def validate_args_w_savings_disabled(tool_name: str, t_args: dict):
# filterApps should be set to savings
assert t_args['filterApps'] == QualFilterApp.SPEEDUPS
- @pytest.mark.parametrize('tool_name', ['qualification', 'profiling', 'bootstrap'])
- @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED])
- def test_no_args(self, tool_name):
- fire.core.Display = lambda lines, out: out.write('\n'.join(lines) + '\n')
- with pytest.raises(SystemExit) as pytest_wrapped_e:
- AbsToolUserArgModel.create_tool_args(tool_name)
- assert pytest_wrapped_e.type == SystemExit
+ @staticmethod
+ def create_tool_args_should_pass(tool_name: str, platform=None, cluster=None, eventlogs=None):
+ return AbsToolUserArgModel.create_tool_args(tool_name,
+ platform=platform,
+ cluster=cluster,
+ eventlogs=eventlogs)
- @pytest.mark.parametrize('tool_name', ['qualification', 'profiling', 'bootstrap'])
- @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED])
- def test_cluster__name_no_hints(self, tool_name):
- fire.core.Display = lambda lines, out: out.write('\n'.join(lines) + '\n')
+ @staticmethod
+ def create_tool_args_should_fail(tool_name: str, platform=None, cluster=None, eventlogs=None):
with pytest.raises(SystemExit) as pytest_wrapped_e:
- AbsToolUserArgModel.create_tool_args(tool_name, cluster='mycluster')
+ AbsToolUserArgModel.create_tool_args(tool_name,
+ platform=platform,
+ cluster=cluster,
+ eventlogs=eventlogs)
assert pytest_wrapped_e.type == SystemExit
- @pytest.mark.parametrize('tool_name', ['qualification', 'profiling'])
- @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props)
- @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A])
- def test_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path):
- cluster_prop_file = f'{get_ut_data_dir}/{prop_path}'
- tool_args = AbsToolUserArgModel.create_tool_args(tool_name,
- cluster=f'{cluster_prop_file}',
- eventlogs=f'{get_ut_data_dir}/eventlogs')
- assert tool_args['runtimePlatform'] == CspEnv(csp)
- # for qualification, passing the cluster properties should be enabled unless it is
- # onprem platform that requires target_platform
- if CspEnv(csp) != CspEnv.ONPREM:
- self.validate_args_w_savings_enabled(tool_name, tool_args)
+ @staticmethod
+ def validate_tool_args(tool_name: str, tool_args: dict, cost_savings_enabled, expected_platform):
+ assert tool_args['runtimePlatform'] == CspEnv(expected_platform)
+ if cost_savings_enabled:
+ TestToolArgProcessor.validate_args_w_savings_enabled(tool_name, tool_args)
else:
- self.validate_args_w_savings_disabled(tool_name, tool_args)
+ TestToolArgProcessor.validate_args_w_savings_disabled(tool_name, tool_args)
@pytest.mark.parametrize('tool_name', ['qualification', 'profiling'])
- @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A])
- def test_no_cluster_props(self, get_ut_data_dir, tool_name):
- # all eventlogs are stored on local path. There is no way to find which cluster
- # we refer to.
- tool_args = AbsToolUserArgModel.create_tool_args(tool_name,
- eventlogs=f'{get_ut_data_dir}/eventlogs')
- assert tool_args['runtimePlatform'] == CspEnv.ONPREM
- # for qualification, cost savings should be disabled
- self.validate_args_w_savings_disabled(tool_name, tool_args)
+ @pytest.mark.parametrize('csp', all_csps)
+ @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED])
+ @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED])
+ def test_with_platform(self, tool_name, csp):
+ # should fail: platform provided; cannot run with platform only
+ self.create_tool_args_should_fail(tool_name, platform=csp)
+
+ # should fail: platform not provided; cannot run with no args
+ self.create_tool_args_should_fail(tool_name=tool_name)
@pytest.mark.parametrize('tool_name', ['qualification', 'profiling'])
- @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A])
- @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.IGNORE])
- def test_onprem_disallow_cluster_by_name(self, get_ut_data_dir, tool_name):
- # onprem platform cannot run when the cluster is by_name
- with pytest.raises(SystemExit) as pytest_exit_e:
- AbsToolUserArgModel.create_tool_args(tool_name,
- cluster='my_cluster',
- eventlogs=f'{get_ut_data_dir}/eventlogs')
- assert pytest_exit_e.type == SystemExit
- with pytest.raises(SystemExit) as pytest_wrapped_e:
- AbsToolUserArgModel.create_tool_args(tool_name,
- platform='onprem',
- cluster='my_cluster')
- assert pytest_wrapped_e.type == SystemExit
+ @pytest.mark.parametrize('csp', all_csps)
+ @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A])
+ @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A])
+ def test_with_platform_with_eventlogs(self, get_ut_data_dir, tool_name, csp):
+ # should pass: platform and event logs are provided
+ tool_args = self.create_tool_args_should_pass(tool_name,
+ platform=csp,
+ eventlogs=f'{get_ut_data_dir}/eventlogs')
+ # for qualification, cost savings should be disabled because cluster is not provided
+ self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
+ cost_savings_enabled=False,
+ expected_platform=csp)
+
+ # should pass: platform not provided; event logs are provided
+ tool_args = self.create_tool_args_should_pass(tool_name,
+ eventlogs=f'{get_ut_data_dir}/eventlogs')
+ # for qualification, cost savings should be disabled because cluster is not provided
+ self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
+ cost_savings_enabled=False,
+ expected_platform=CspEnv.ONPREM)
@pytest.mark.parametrize('tool_name', ['qualification', 'profiling'])
- @pytest.mark.parametrize('csp', csps)
+ @pytest.mark.parametrize('csp', all_csps)
+ @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A])
@register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED])
- def test_cluster_name_no_eventlogs(self, tool_name, csp):
- # Missing eventlogs should be accepted for all CSPs (except onPrem)
- # because the eventlogs can be retrieved from the cluster
- tool_args = AbsToolUserArgModel.create_tool_args(tool_name,
- platform=csp,
- cluster='my_cluster')
- assert tool_args['runtimePlatform'] == CspEnv(csp)
- self.validate_args_w_savings_enabled(tool_name, tool_args)
+ def test_with_platform_with_cluster_name_with_eventlogs(self, get_ut_data_dir, tool_name, csp):
+ if CspEnv(csp) != CspEnv.ONPREM:
+ # should pass: platform, cluster name and eventlogs are provided
+ tool_args = self.create_tool_args_should_pass(tool_name,
+ platform=csp,
+ cluster='my_cluster',
+ eventlogs=f'{get_ut_data_dir}/eventlogs')
+ # for qualification, cost savings should be enabled because cluster is provided
+ self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
+ cost_savings_enabled=True,
+ expected_platform=csp)
+
+ # should pass: event logs not provided; missing eventlogs should be accepted for
+ # all CSPs (except onPrem) because the event logs can be retrieved from the cluster
+ tool_args = self.create_tool_args_should_pass(tool_name,
+ platform=csp,
+ cluster='my_cluster')
+ # for qualification, cost savings should be enabled because cluster is provided
+ self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
+ cost_savings_enabled=True,
+ expected_platform=csp)
+ else:
+ # should fail: platform, cluster name and eventlogs are provided; onprem platform
+ # cannot run when the cluster is by name
+ self.create_tool_args_should_fail(tool_name,
+ platform=csp,
+ cluster='my_cluster',
+ eventlogs=f'{get_ut_data_dir}/eventlogs')
+
+ # should fail: event logs not provided; onprem platform cannot run when the cluster is by name
+ self.create_tool_args_should_fail(tool_name,
+ platform=csp,
+ cluster='my_cluster')
@pytest.mark.parametrize('tool_name', ['qualification', 'profiling'])
- @pytest.mark.parametrize('csp,prop_path', csp_cpu_cluster_props)
+ @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A])
+ @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED])
+ def test_with_cluster_name_with_eventlogs(self, get_ut_data_dir, tool_name):
+ # should fail: eventlogs provided; defaults platform to onprem, cannot run when the cluster is by name
+ self.create_tool_args_should_fail(tool_name,
+ cluster='my_cluster',
+ eventlogs=f'{get_ut_data_dir}/eventlogs')
+
+ # should fail: eventlogs not provided; defaults platform to onprem, cannot run when the cluster is by name
+ self.create_tool_args_should_fail(tool_name,
+ cluster='my_cluster')
+
+ @pytest.mark.parametrize('tool_name', ['qualification', 'profiling'])
+ @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props)
+ @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED])
@register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED])
- def test_cluster_props_no_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path):
- # Missing eventlogs should be accepted for all CSPs (except onPrem)
- # because the eventlogs can be retrieved from the cluster
+ def test_with_platform_with_cluster_props(self, get_ut_data_dir, tool_name, csp, prop_path):
cluster_prop_file = f'{get_ut_data_dir}/{prop_path}'
- tool_args = AbsToolUserArgModel.create_tool_args(tool_name,
- cluster=f'{cluster_prop_file}')
- assert tool_args['runtimePlatform'] == CspEnv(csp)
- self.validate_args_w_savings_enabled(tool_name, tool_args)
+ if CspEnv(csp) != CspEnv.ONPREM:
+ # should pass: platform provided; missing eventlogs should be accepted for all CSPs (except onPrem)
+ # because the eventlogs can be retrieved from the cluster properties
+ tool_args = self.create_tool_args_should_pass(tool_name,
+ platform=csp,
+ cluster=cluster_prop_file)
+ # for qualification, cost savings should be enabled because cluster is provided
+ self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
+ cost_savings_enabled=True,
+ expected_platform=csp)
+
+ # should pass: platform not provided; missing eventlogs should be accepted for all CSPs (except onPrem)
+ # because the eventlogs can be retrieved from the cluster properties
+ tool_args = self.create_tool_args_should_pass(tool_name,
+ cluster=cluster_prop_file)
+ # for qualification, cost savings should be enabled because cluster is provided
+ self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
+ cost_savings_enabled=True,
+ expected_platform=csp)
+ else:
+ # should fail: onprem platform cannot retrieve eventlogs from cluster properties
+ self.create_tool_args_should_fail(tool_name,
+ platform=csp,
+ cluster=cluster_prop_file)
+
+ # should fail: platform not provided; defaults platform to onprem, cannot retrieve eventlogs from
+ # cluster properties
+ self.create_tool_args_should_fail(tool_name,
+ cluster=cluster_prop_file)
@pytest.mark.parametrize('tool_name', ['qualification', 'profiling'])
- @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED])
- def test_cluster_props_no_eventlogs_on_prem(self, capsys, tool_name):
- # Missing eventlogs is not accepted for onPrem
- with pytest.raises(SystemExit) as pytest_wrapped_e:
- AbsToolUserArgModel.create_tool_args(tool_name,
- platform='onprem')
- assert pytest_wrapped_e.type == SystemExit
- captured = capsys.readouterr()
- # Verify there is no URL in error message except for the one from the documentation
- assert 'https://' not in captured.err or 'docs.nvidia.com' in captured.err
+ @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props)
+ @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A])
+ @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A])
+ def test_with_platform_with_cluster_props_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path):
+ # should pass: platform, cluster properties and eventlogs are provided
+ cluster_prop_file = f'{get_ut_data_dir}/{prop_path}'
+ tool_args = self.create_tool_args_should_pass(tool_name,
+ platform=csp,
+ cluster=cluster_prop_file,
+ eventlogs=f'{get_ut_data_dir}/eventlogs')
+ # for qualification, cost savings should be enabled because cluster is provided (except for onprem)
+ self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
+ cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM,
+ expected_platform=csp)
+
+ # should pass: platform not provided; cluster properties and eventlogs are provided
+ tool_args = self.create_tool_args_should_pass(tool_name,
+ cluster=cluster_prop_file,
+ eventlogs=f'{get_ut_data_dir}/eventlogs')
+ # for qualification, cost savings should be enabled because cluster is provided (except for onprem)
+ self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
+ cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM,
+ expected_platform=csp)
+
+ @pytest.mark.parametrize('tool_name', ['profiling'])
+ @pytest.mark.parametrize('csp', all_csps)
+ @pytest.mark.parametrize('prop_path', [autotuner_prop_path])
+ @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED])
+ @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED])
+ def test_with_platform_with_autotuner(self, get_ut_data_dir, tool_name, csp, prop_path):
+ # should fail: platform provided; autotuner needs eventlogs
+ autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}'
+ self.create_tool_args_should_fail(tool_name,
+ platform=csp,
+ cluster=autotuner_prop_file)
+
+ # should fail: platform not provided; autotuner needs eventlogs
+ self.create_tool_args_should_fail(tool_name,
+ cluster=autotuner_prop_file)
+
+ @pytest.mark.parametrize('tool_name', ['profiling'])
+ @pytest.mark.parametrize('csp', all_csps)
+ @pytest.mark.parametrize('prop_path', [autotuner_prop_path])
+ @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.VALUE_A])
+ @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.VALUE_A])
+ def test_with_platform_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path):
+ # should pass: platform, autotuner properties and eventlogs are provided
+ autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}'
+ tool_args = self.create_tool_args_should_pass(tool_name,
+ platform=csp,
+ cluster=autotuner_prop_file,
+ eventlogs=f'{get_ut_data_dir}/eventlogs')
+ # cost savings should be disabled for profiling
+ self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
+ cost_savings_enabled=False,
+ expected_platform=csp)
+
+ # should pass: platform not provided; autotuner properties and eventlogs are provided
+ tool_args = self.create_tool_args_should_pass(tool_name,
+ cluster=autotuner_prop_file,
+ eventlogs=f'{get_ut_data_dir}/eventlogs')
+ # cost savings should be disabled for profiling
+ self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
+ cost_savings_enabled=False,
+ expected_platform=CspEnv.ONPREM)
- @pytest.mark.skip(reason='Unit tests are not completed yet')
def test_arg_cases_coverage(self):
- args_keys = [
- [ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED],
- [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED],
- [ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.IGNORE],
- [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.IGNORE],
- [ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A],
- [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A],
- [ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]
- ]
-
- for arg_key in args_keys:
- assert str(arg_key) in triplet_test_registry
+ """
+ This test ensures that above tests have covered all possible states of the `platform`, `cluster`,
+ and `event logs` fields.
+
+ Possible States:
+ - platform:`undefined` or `actual value`.
+ - cluster: `undefined`, `cluster name`, `cluster property file` or `auto tuner file`.
+ - event logs: `undefined` or `actual value`.
+ """
+ arg_platform_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]
+ arg_cluster_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.VALUE_C]
+ arg_eventlogs_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]
+
+ all_args_keys = [str([p, c, e]) for p in arg_platform_cases for c in arg_cluster_cases for e in
+ arg_eventlogs_cases]
+ args_covered = set(triplet_test_registry.keys())
+ args_not_covered = set(all_args_keys) - args_covered
+
+ if args_not_covered:
+ # cases not covered
+ args_not_covered_str = '\n'.join(args_not_covered)
+ warnings.warn(f'Cases not covered:\n{args_not_covered_str}')
+ warnings.warn(f'Coverage of all argument cases: {len(args_covered)}/{len(all_args_keys)}')