From a3832affb097e6fd224d72dc87f3f58432f1979c Mon Sep 17 00:00:00 2001
From: Niranjan Artal <50492963+nartal1@users.noreply.github.com>
Date: Wed, 5 Jul 2023 07:11:59 -0700
Subject: [PATCH 01/14] Qualification tool to report with granularity on
 unsupported operators (#401)

* Qualification tool report for unsupported operators

---------

Signed-off-by: Niranjan Artal <nartal@nvidia.com>
---
 .../planparser/FileSourceScanExecParser.scala |   5 +-
 .../tool/qualification/QualOutputWriter.scala | 119 +++++++++++++++++-
 .../tool/qualification/Qualification.scala    |   1 +
 .../qualification/QualificationAppInfo.scala  |  18 ++-
 .../complex_dec_expectation.csv               |   2 +-
 .../jdbc_expectation.csv                      |   2 +-
 .../nds_q86_fail_test_expectation.csv         |   2 +-
 .../nds_q86_test_expectation.csv              |   2 +-
 .../qual_test_simple_expectation.csv          |   6 +-
 .../read_dsv1_expectation.csv                 |   2 +-
 .../write_format_expectation.csv              |   2 +-
 .../qualification/QualificationSuite.scala    |  35 ++++++
 12 files changed, 183 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/FileSourceScanExecParser.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/FileSourceScanExecParser.scala
index 2a8a703cc..f1b70b9c3 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/FileSourceScanExecParser.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/FileSourceScanExecParser.scala
@@ -32,6 +32,9 @@ case class FileSourceScanExecParser(
   val fullExecName = "FileSourceScanExec"
 
   override def parse: ExecInfo = {
+    // Remove trailing spaces from node name
+    // Example: Scan parquet . ->  Scan parquet.
+    val nodeName = node.name.trim
     val accumId = node.metrics.find(_.name == "scan time").map(_.accumulatorId)
     val maxDuration = SQLPlanParser.getTotalDuration(accumId, app)
 
@@ -42,6 +45,6 @@ case class FileSourceScanExecParser(
     val overallSpeedup = Math.max((speedupFactor * score), 1.0)
 
     // TODO - add in parsing expressions - average speedup across?
-    new ExecInfo(sqlID, node.name, "", overallSpeedup, maxDuration, node.id, score > 0, None)
+    new ExecInfo(sqlID, nodeName, "", overallSpeedup, maxDuration, node.id, score > 0, None)
   }
 }
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualOutputWriter.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualOutputWriter.scala
index 7e37d9b18..96215552c 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualOutputWriter.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualOutputWriter.scala
@@ -16,7 +16,7 @@
 
 package com.nvidia.spark.rapids.tool.qualification
 
-import scala.collection.mutable.{Buffer, LinkedHashMap, ListBuffer}
+import scala.collection.mutable.{ArrayBuffer, Buffer, LinkedHashMap, ListBuffer}
 
 import com.nvidia.spark.rapids.tool.ToolTextFileWriter
 import com.nvidia.spark.rapids.tool.planparser.{ExecInfo, PlanInfo}
@@ -146,6 +146,21 @@ class QualOutputWriter(outputDir: String, reportReadSchema: Boolean,
     }
   }
 
+  def writeUnsupportedOperatorsCSVReport(sums: Seq[QualificationSummaryInfo],
+      order: String): Unit = {
+    val csvFileWriter = new ToolTextFileWriter(outputDir,
+      s"${QualOutputWriter.LOGFILE_NAME}_unsupportedOperators.csv",
+      "Unsupported Operators CSV Report", hadoopConf)
+    val headersAndSizes = QualOutputWriter.getUnsupportedOperatorsHeaderStringsAndSizes(sums)
+    csvFileWriter.write(QualOutputWriter.constructOutputRowFromMap(headersAndSizes,
+      QualOutputWriter.CSV_DELIMITER, false))
+    sums.foreach { sum =>
+      val rows = QualOutputWriter.constructUnsupportedOperatorsInfo(sum, headersAndSizes,
+        QualOutputWriter.CSV_DELIMITER, false)
+      rows.foreach(row => csvFileWriter.write(row))
+    }
+  }
+
   def writePerSqlCSVReport(sums: Seq[QualificationSummaryInfo], maxSQLDescLength: Int): Unit = {
     val csvFileWriter = new ToolTextFileWriter(outputDir,
       s"${QualOutputWriter.LOGFILE_NAME}_persql.csv",
@@ -362,6 +377,9 @@ object QualOutputWriter {
   val CLUSTER_TAGS = "Cluster Tags"
   val CLUSTER_ID = "ClusterId"
   val JOB_ID = "JobId"
+  val UNSUPPORTED_TYPE = "Unsupported Type"
+  val DETAILS = "Details"
+  val NOTES = "Notes"
   val RUN_NAME = "RunName"
   val ESTIMATED_FREQUENCY = "Estimated Job Frequency (monthly)"
   val ML_FUNCTIONS = "ML Functions"
@@ -493,6 +511,18 @@ object QualOutputWriter {
     prettyPrintValue
   }
 
+  def getUnsupportedOperatorsHeaderStringsAndSizes(
+      appInfos: Seq[QualificationSummaryInfo]): LinkedHashMap[String, Int] = {
+    val detailedHeaderAndFields = LinkedHashMap[String, Int](
+      APP_ID_STR -> QualOutputWriter.getAppIdSize(appInfos),
+      UNSUPPORTED_TYPE -> UNSUPPORTED_TYPE.size,
+      DETAILS -> DETAILS.size,
+      NOTES -> NOTES.size
+    )
+    detailedHeaderAndFields
+  }
+
+
   def getDetailedHeaderStringsAndSizes(appInfos: Seq[QualificationSummaryInfo],
       reportReadSchema: Boolean): LinkedHashMap[String, Int] = {
     val detailedHeadersAndFields = LinkedHashMap[String, Int](
@@ -831,6 +861,93 @@ object QualOutputWriter {
     }
   }
 
+  def constructUnsupportedOperatorsInfo(
+      sumInfo: QualificationSummaryInfo,
+      headersAndSizes: LinkedHashMap[String, Int],
+      delimiter: String = TEXT_DELIMITER,
+      prettyPrint: Boolean,
+      reformatCSV: Boolean = true): Seq[String] = {
+    val reformatCSVFunc: String => String =
+      if (reformatCSV) str => StringUtils.reformatCSVString(str) else str => stringIfempty(str)
+    val appId = sumInfo.appId
+    val readFormat = sumInfo.readFileFormatAndTypesNotSupported
+    val writeFormat = sumInfo.writeDataFormat
+    val unsupportedExecs = sumInfo.unSupportedExecs
+    val unsupportedExprs = sumInfo.unSupportedExprs
+    val unsupportedExecExprsMap = sumInfo.unsupportedExecstoExprsMap
+    val unsupportedOperatorsOutputRows = new ArrayBuffer[String]()
+
+    if (readFormat.nonEmpty) {
+      val unsupportedReadFormatRows = readFormat.map { format =>
+        val readFormatAndType = format.split("\\[")
+        val readFormat = readFormatAndType(0)
+        val readType = if (readFormatAndType.size > 1) {
+          s"Types not supported - ${readFormatAndType(1).replace("]", "")}"
+        } else {
+          ""
+        }
+        val data = ListBuffer(
+          reformatCSVFunc(appId) -> headersAndSizes(APP_ID_STR),
+          reformatCSVFunc("Read")-> headersAndSizes(UNSUPPORTED_TYPE),
+          reformatCSVFunc(readFormat) -> headersAndSizes(DETAILS),
+          reformatCSVFunc(readType) -> headersAndSizes(NOTES)
+        )
+        constructOutputRow(data, delimiter, prettyPrint)
+      }
+      unsupportedOperatorsOutputRows ++= unsupportedReadFormatRows
+    }
+    if (unsupportedExecs.nonEmpty) {
+      val unsupportedExecRows = unsupportedExecs.split(";").map { exec =>
+        val data = ListBuffer(
+          reformatCSVFunc(appId) -> headersAndSizes(APP_ID_STR),
+          reformatCSVFunc("Exec") -> headersAndSizes(UNSUPPORTED_TYPE),
+          reformatCSVFunc(exec) -> headersAndSizes(DETAILS),
+          reformatCSVFunc("") -> headersAndSizes(NOTES)
+        )
+        constructOutputRow(data, delimiter, prettyPrint)
+      }
+      unsupportedOperatorsOutputRows ++= unsupportedExecRows
+    }
+    if (unsupportedExecExprsMap.nonEmpty) {
+      val unsupportedExecExprMapRows = unsupportedExecExprsMap.map { case (exec, exprs) =>
+        val data = ListBuffer(
+          reformatCSVFunc(appId) -> headersAndSizes(APP_ID_STR),
+          reformatCSVFunc("Exec") -> headersAndSizes(UNSUPPORTED_TYPE),
+          reformatCSVFunc(exec) -> headersAndSizes(DETAILS),
+          reformatCSVFunc("$exec Exec is not supported as expressions are " +
+            "not supported -  `${exprs}`") -> headersAndSizes(NOTES)
+        )
+        constructOutputRow(data, delimiter, prettyPrint)
+      }.toArray
+      unsupportedOperatorsOutputRows ++= unsupportedExecExprMapRows
+    }
+    if (unsupportedExprs.nonEmpty) {
+      val unsupportedExprRows = unsupportedExprs.split(";").map { expr =>
+        val data = ListBuffer(
+          reformatCSVFunc(appId) -> headersAndSizes(APP_ID_STR),
+          reformatCSVFunc("Expression") -> headersAndSizes(UNSUPPORTED_TYPE),
+          reformatCSVFunc(expr) -> headersAndSizes(DETAILS),
+          reformatCSVFunc("") -> headersAndSizes(NOTES)
+        )
+        constructOutputRow(data, delimiter, prettyPrint)
+      }
+      unsupportedOperatorsOutputRows ++= unsupportedExprRows
+    }
+    if (writeFormat.nonEmpty) {
+      val unsupportedwriteFormatRows = writeFormat.map { format =>
+        val data = ListBuffer(
+          reformatCSVFunc(appId) -> headersAndSizes(APP_ID_STR),
+          reformatCSVFunc("Write") -> headersAndSizes(UNSUPPORTED_TYPE),
+          reformatCSVFunc(format) -> headersAndSizes(DETAILS),
+          reformatCSVFunc("") -> headersAndSizes(NOTES)
+        )
+        constructOutputRow(data, delimiter, prettyPrint)
+      }
+      unsupportedOperatorsOutputRows ++= unsupportedwriteFormatRows
+    }
+    unsupportedOperatorsOutputRows
+  }
+
   def getAllExecsFromPlan(plans: Seq[PlanInfo]): Set[ExecInfo] = {
     val topExecInfo = plans.flatMap(_.execInfo)
     topExecInfo.flatMap { e =>
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala
index 2ae0253e3..d3b24d32f 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala
@@ -88,6 +88,7 @@ class Qualification(outputDir: String, numRows: Int, hadoopConf: Configuration,
     }
     qWriter.writeExecReport(allAppsSum, order)
     qWriter.writeStageReport(allAppsSum, order)
+    qWriter.writeUnsupportedOperatorsCSVReport(allAppsSum, order)
     if (mlOpsEnabled) {
       if (allAppsSum.exists(x => x.mlFunctions.nonEmpty)) {
         qWriter.writeMlFuncsReports(allAppsSum, order)
diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualificationAppInfo.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualificationAppInfo.scala
index 4cef83d36..ba47ca89b 100644
--- a/core/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualificationAppInfo.scala
+++ b/core/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualificationAppInfo.scala
@@ -439,7 +439,7 @@ class QualificationAppInfo(
       val supportedSQLTaskDuration = calculateSQLSupportedTaskDuration(allStagesSummary)
       val taskSpeedupFactor = calculateSpeedupFactor(allStagesSummary)
       // Get all the unsupported Execs from the plan
-      val unSupportedExecs = origPlanInfos.flatMap { p =>
+      val unSupportedExecs = planInfos.flatMap { p =>
         // WholeStageCodeGen is excluded from the result.
         val topLevelExecs = p.execInfo.filterNot(_.isSupported).filterNot(
           x => x.exec.startsWith("WholeStage"))
@@ -454,6 +454,19 @@ class QualificationAppInfo(
         _.unsupportedExprs)).flatten.filter(_.nonEmpty).toSet.mkString(";")
         .trim.replaceAll("\n", "").replace(",", ":")
 
+      // Get all unsupported execs and expressions from the plan in form of map[exec -> exprs]
+      val unsupportedExecExprsMap = planInfos.flatMap { p =>
+        val topLevelExecs = p.execInfo.filterNot(_.isSupported).filterNot(
+          x => x.exec.startsWith("WholeStage"))
+        val childrenExecs = p.execInfo.flatMap { e =>
+          e.children.map(x => x.filterNot(_.isSupported))
+        }.flatten
+        val execs = topLevelExecs ++ childrenExecs
+        val exprs = execs.filter(_.unsupportedExprs.nonEmpty).map(
+          e => e.exec -> e.unsupportedExprs.mkString(";")).toMap
+        exprs
+      }.toMap
+
       // check if there are any SparkML/XGBoost functions or expressions if the mlOpsEnabled
       // config is true
       val mlFunctions = if (mlOpsEnabled) {
@@ -494,7 +507,7 @@ class QualificationAppInfo(
         taskSpeedupFactor, info.sparkUser, info.startTime, origPlanInfos,
         perSqlStageSummary.map(_.stageSum).flatten, estimatedInfo, perSqlInfos,
         unSupportedExecs, unSupportedExprs, clusterTags, allClusterTagsMap, mlFunctions,
-        mlTotalStageDuration)
+        mlTotalStageDuration, unsupportedExecExprsMap)
     }
   }
 
@@ -708,6 +721,7 @@ case class QualificationSummaryInfo(
     allClusterTagsMap: Map[String, String],
     mlFunctions: Option[Seq[MLFunctions]],
     mlFunctionsStageDurations: Option[Seq[MLFuncsStageDuration]],
+    unsupportedExecstoExprsMap: Map[String, String],
     estimatedFrequency: Option[Long] = None)
 
 case class StageQualSummaryInfo(
diff --git a/core/src/test/resources/QualificationExpectations/complex_dec_expectation.csv b/core/src/test/resources/QualificationExpectations/complex_dec_expectation.csv
index 0a0878ef9..9d29c9106 100644
--- a/core/src/test/resources/QualificationExpectations/complex_dec_expectation.csv
+++ b/core/src/test/resources/QualificationExpectations/complex_dec_expectation.csv
@@ -1,2 +1,2 @@
 App Name,App ID,Recommendation,Estimated GPU Speedup,Estimated GPU Duration,Estimated GPU Time Saved,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,Task Speedup Factor,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly)
-"Spark shell","local-1626104300434","Not Recommended",1.01,129484.66,1619.33,2429,1469,131104,2429,88.35,"","","","struct<firstname:string,middlename:array<string>,lastname:string>;struct<current:struct<state:string,city:string>,previous:struct<state:map<string,string>,city:string>>;array<struct<city:string,state:string>>;map<string,string>;map<string,array<string>>;map<string,map<string,string>>;array<array<string>>;array<string>","struct<firstname:string,middlename:array<string>,lastname:string>;struct<current:struct<state:string,city:string>,previous:struct<state:map<string,string>,city:string>>;array<struct<city:string,state:string>>;map<string,array<string>>;map<string,map<string,string>>;array<array<string>>","NESTED COMPLEX TYPE",1260,128847,0,1469,3.0,false,"CollectLimit;ColumnarToRow","",30
+"Spark shell","local-1626104300434","Not Recommended",1.01,129484.66,1619.33,2429,1469,131104,2429,88.35,"","","","struct<firstname:string,middlename:array<string>,lastname:string>;struct<current:struct<state:string,city:string>,previous:struct<state:map<string,string>,city:string>>;array<struct<city:string,state:string>>;map<string,string>;map<string,array<string>>;map<string,map<string,string>>;array<array<string>>;array<string>","struct<firstname:string,middlename:array<string>,lastname:string>;struct<current:struct<state:string,city:string>,previous:struct<state:map<string,string>,city:string>>;array<struct<city:string,state:string>>;map<string,array<string>>;map<string,map<string,string>>;array<array<string>>","NESTED COMPLEX TYPE",1260,128847,0,1469,3.0,false,"CollectLimit","",30
diff --git a/core/src/test/resources/QualificationExpectations/jdbc_expectation.csv b/core/src/test/resources/QualificationExpectations/jdbc_expectation.csv
index 06197a211..eb8a72b10 100644
--- a/core/src/test/resources/QualificationExpectations/jdbc_expectation.csv
+++ b/core/src/test/resources/QualificationExpectations/jdbc_expectation.csv
@@ -1,2 +1,2 @@
 App Name,App ID,Recommendation,Estimated GPU Speedup,Estimated GPU Duration,Estimated GPU Time Saved,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,Task Speedup Factor,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly)
-"Spark shell","app-20211019113801-0001","Not Recommended",1.0,569385.42,2581.57,3627,19894,571967,3503,28.41,"","JDBC[*]","","","","",1812,544575,677,19217,3.8,false,"Scan JDBCRelation(TBLS) [numPartitions=1] ;Execute CreateViewCommand;CollectLimit","",30
+"Spark shell","app-20211019113801-0001","Not Recommended",1.0,569385.42,2581.57,3627,19894,571967,3503,28.41,"","JDBC[*]","","","","",1812,544575,677,19217,3.8,false,"Scan JDBCRelation(TBLS) [numPartitions=1];Execute CreateViewCommand;CollectLimit","",30
diff --git a/core/src/test/resources/QualificationExpectations/nds_q86_fail_test_expectation.csv b/core/src/test/resources/QualificationExpectations/nds_q86_fail_test_expectation.csv
index e5b254683..c646df568 100644
--- a/core/src/test/resources/QualificationExpectations/nds_q86_fail_test_expectation.csv
+++ b/core/src/test/resources/QualificationExpectations/nds_q86_fail_test_expectation.csv
@@ -1,2 +1,2 @@
 App Name,App ID,Recommendation,Estimated GPU Speedup,Estimated GPU Duration,Estimated GPU Time Saved,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,Task Speedup Factor,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly)
-"TPC-DS Like Bench q86","app-20210319163812-1778","Not Applicable",1.36,19120.15,7050.84,9569,4320658,26171,9569,0.0,"24","","","","","",9565,3595714,0,4320658,3.8,false,"Execute CreateViewCommand;ColumnarToRow","",30
+"TPC-DS Like Bench q86","app-20210319163812-1778","Not Applicable",1.36,19120.15,7050.84,9569,4320658,26171,9569,0.0,"24","","","","","",9565,3595714,0,4320658,3.8,false,"Execute CreateViewCommand","",30
diff --git a/core/src/test/resources/QualificationExpectations/nds_q86_test_expectation.csv b/core/src/test/resources/QualificationExpectations/nds_q86_test_expectation.csv
index db532f67c..b58f5b8ef 100644
--- a/core/src/test/resources/QualificationExpectations/nds_q86_test_expectation.csv
+++ b/core/src/test/resources/QualificationExpectations/nds_q86_test_expectation.csv
@@ -1,2 +1,2 @@
 App Name,App ID,Recommendation,Estimated GPU Speedup,Estimated GPU Duration,Estimated GPU Time Saved,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,Task Speedup Factor,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly)
-"TPC-DS Like Bench q86","app-20210319163812-1778","Recommended",1.36,19120.15,7050.84,9569,4320658,26171,9569,35.34,"","","","","","",9565,3595714,0,4320658,3.8,false,"Execute CreateViewCommand;ColumnarToRow","",30
+"TPC-DS Like Bench q86","app-20210319163812-1778","Recommended",1.36,19120.15,7050.84,9569,4320658,26171,9569,35.34,"","","","","","",9565,3595714,0,4320658,3.8,false,"Execute CreateViewCommand","",30
diff --git a/core/src/test/resources/QualificationExpectations/qual_test_simple_expectation.csv b/core/src/test/resources/QualificationExpectations/qual_test_simple_expectation.csv
index 76dbc398d..f43f88c8d 100644
--- a/core/src/test/resources/QualificationExpectations/qual_test_simple_expectation.csv
+++ b/core/src/test/resources/QualificationExpectations/qual_test_simple_expectation.csv
@@ -1,5 +1,5 @@
 App Name,App ID,Recommendation,Estimated GPU Speedup,Estimated GPU Duration,Estimated GPU Time Saved,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,Task Speedup Factor,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly)
 "Rapids Spark Profiling Tool Unit Tests","local-1622043423018","Recommended",1.92,8472.65,7846.34,12434,132257,16319,10589,37.7,"","","JSON","","","",7143,4717,19616,112641,3.86,false,"SerializeFromObject;Execute InsertIntoHadoopFsRelationCommand json;DeserializeToObject;Filter;MapElements;Scan","",1
-"Spark shell","local-1651187225439","Not Recommended",1.0,355483.43,153.56,760,180,355637,350,87.88,"","JSON[string:bigint:int]","","","","",498,343411,97,83,1.78,false,"SerializeFromObject;CollectLimit;Scan json ;DeserializeToObject;Filter;MapElements","",1
-"Spark shell","local-1651188809790","Not Recommended",1.0,166199.97,15.02,911,283,166215,45,81.18,"","JSON[string:bigint:int]","","","","UDF",715,133608,269,14,1.5,false,"CollectLimit;Scan json ;Project","UDF",1
-"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","Not Recommended",1.0,6240.0,0.0,2032,4666,6240,0,46.27,"","JSON[string:bigint:int]","JSON","","","UDF",1209,5793,4664,2,1.0,false,"Scan json ;Execute InsertIntoHadoopFsRelationCommand json;LocalTableScan;Project;Execute CreateViewCommand","UDF",1
+"Spark shell","local-1651187225439","Not Recommended",1.0,355483.43,153.56,760,180,355637,350,87.88,"","JSON[string:bigint:int]","","","","",498,343411,97,83,1.78,false,"SerializeFromObject;CollectLimit;DeserializeToObject;Scan json;Filter;MapElements","",1
+"Spark shell","local-1651188809790","Not Recommended",1.0,166199.97,15.02,911,283,166215,45,81.18,"","JSON[string:bigint:int]","","","","UDF",715,133608,269,14,1.5,false,"CollectLimit;Scan json;Project","UDF",1
+"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","Not Recommended",1.0,6240.0,0.0,2032,4666,6240,0,46.27,"","JSON[string:bigint:int]","JSON","","","UDF",1209,5793,4664,2,1.0,false,"Execute InsertIntoHadoopFsRelationCommand json;LocalTableScan;Project;Scan json;Execute CreateViewCommand","UDF",1
diff --git a/core/src/test/resources/QualificationExpectations/read_dsv1_expectation.csv b/core/src/test/resources/QualificationExpectations/read_dsv1_expectation.csv
index b600f9dca..f624cc260 100644
--- a/core/src/test/resources/QualificationExpectations/read_dsv1_expectation.csv
+++ b/core/src/test/resources/QualificationExpectations/read_dsv1_expectation.csv
@@ -1,2 +1,2 @@
 App Name,App ID,Recommendation,Estimated GPU Speedup,Estimated GPU Duration,Estimated GPU Time Saved,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,Task Speedup Factor,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly)
-"Spark shell","local-1624371544219","Not Recommended",1.0,174691.42,601.57,6695,20421,175293,1034,72.15,"","JSON[string:double:date:int:bigint];Text[*]","JSON","","","",1859,175857,17266,3155,2.39,false,"CollectLimit;Scan json ;Execute InsertIntoHadoopFsRelationCommand json;Scan text","",30
+"Spark shell","local-1624371544219","Not Recommended",1.0,174691.42,601.57,6695,20421,175293,1034,72.15,"","JSON[string:double:date:int:bigint];Text[*]","JSON","","","",1859,175857,17266,3155,2.39,false,"CollectLimit;Scan json;Execute InsertIntoHadoopFsRelationCommand json;Scan text","",30
diff --git a/core/src/test/resources/QualificationExpectations/write_format_expectation.csv b/core/src/test/resources/QualificationExpectations/write_format_expectation.csv
index 1f2f10fd6..d18cf6f22 100644
--- a/core/src/test/resources/QualificationExpectations/write_format_expectation.csv
+++ b/core/src/test/resources/QualificationExpectations/write_format_expectation.csv
@@ -1,2 +1,2 @@
 App Name,App ID,Recommendation,Estimated GPU Speedup,Estimated GPU Duration,Estimated GPU Time Saved,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,Task Speedup Factor,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly)
-"Spark shell","local-1629442299891","Not Recommended",1.05,18558.0,996.0,1992,920,19554,1992,91.72,"","","CSV;JSON","","","",1235,16325,0,920,2.0,false,"ColumnarToRow;Execute InsertIntoHadoopFsRelationCommand json;Execute InsertIntoHadoopFsRelationCommand csv","",30
+"Spark shell","local-1629442299891","Not Recommended",1.05,18558.0,996.0,1992,920,19554,1992,91.72,"","","CSV;JSON","","","",1235,16325,0,920,2.0,false,"Execute InsertIntoHadoopFsRelationCommand json;Execute InsertIntoHadoopFsRelationCommand csv","",30
diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala
index 4caf19d88..ab38f9d71 100644
--- a/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala
+++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala
@@ -1107,6 +1107,41 @@ class QualificationSuite extends BaseTestSuite {
     }
   }
 
+  test("test csv output for unsupported operators") {
+    TrampolineUtil.withTempDir { outpath =>
+      val tmpJson = s"$outpath/jsonfile"
+      TrampolineUtil.withTempDir { jsonOutputFile =>
+        val (eventLog, _) = ToolTestUtils.generateEventLog(jsonOutputFile, "jsonFile") { spark =>
+          import spark.implicits._
+          val testData = Seq((1, 2), (3, 4)).toDF("a", "b")
+          testData.write.json(tmpJson)
+          val df = spark.read.json(tmpJson)
+          val res = df.join(df.select($"a" as "a2"), $"a" === $"a2")
+          res
+        }
+        val allArgs = Array(
+          "--output-directory",
+          outpath.getAbsolutePath())
+        val appArgs = new QualificationArgs(allArgs ++ Array(eventLog))
+        val (exit, appSum) = QualificationMain.mainInternal(appArgs)
+        assert(exit == 0)
+
+        val filename = s"$outpath/rapids_4_spark_qualification_output/" +
+          s"rapids_4_spark_qualification_output_unsupportedOperators.csv"
+        val inputSource = Source.fromFile(filename)
+        try {
+          val lines = inputSource.getLines.toSeq
+          // 1 for header, 1 for values
+          assert(lines.size == 6)
+          assert(lines.head.contains("App ID,Unsupported Type,"))
+          assert(lines(1).contains("\"Read\",\"JSON\",\"Types not supported - bigint:int\""))
+        } finally {
+          inputSource.close()
+        }
+      }
+    }
+  }
+
   test("running qualification app files with per sql") {
     TrampolineUtil.withTempPath { outParquetFile =>
       TrampolineUtil.withTempPath { outJsonFile =>

From 6f473a850f7441b707dd8135bc3e15f6140bbbec Mon Sep 17 00:00:00 2001
From: Niranjan Artal <50492963+nartal1@users.noreply.github.com>
Date: Wed, 5 Jul 2023 15:05:18 -0700
Subject: [PATCH 02/14] Bug fix for supported expressions (#408)

* Bug fix for supported expressions

Signed-off-by: Niranjan Artal <nartal@nvidia.com>

* Addressed review comments

---------

Signed-off-by: Niranjan Artal <nartal@nvidia.com>
---
 .../tool/qualification/PluginTypeChecker.scala       | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeChecker.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeChecker.scala
index 3c417eb8b..9986f5ad2 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeChecker.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeChecker.scala
@@ -112,7 +112,7 @@ class PluginTypeChecker(platform: String = "onprem") extends Logging {
     // Some SQL function names have backquotes(`) around their names,
     // so we remove them before saving.
     readSupportedOperators(source, "exprs").map(
-      x => (x._1.toLowerCase.replaceAll("\\`", ""), x._2))
+      x => (x._1.toLowerCase.replaceAll("\\`", "").replaceAll(" ",""), x._2))
   }
 
   private def readSupportedTypesForPlugin: (
@@ -293,15 +293,7 @@ class PluginTypeChecker(platform: String = "onprem") extends Logging {
   }
 
   def isExprSupported(expr: String): Boolean = {
-    // Remove _ from the string. Example: collect_list => collectlist.
-    // collect_list is alias for CollectList aggregate function.
-    // An exception is date_format since the Expression and sql function name is different
-    // Expression: DateFormatClass, sql function- date_format
-    val exprLowercase = if (expr.equalsIgnoreCase("date_format")) {
-      expr
-    } else {
-      expr.toLowerCase.replace("_", "")
-    }
+    val exprLowercase = expr.toLowerCase
     if (supportedExprs.contains(exprLowercase)) {
       val exprSupported = supportedExprs.getOrElse(exprLowercase, "NS")
       if (exprSupported == "S") {

From bde9db99c5253854e182d0e143ffbfc18bbd3ed4 Mon Sep 17 00:00:00 2001
From: Partho Sarthi <psarthi@nvidia.com>
Date: Fri, 7 Jul 2023 08:06:22 -0700
Subject: [PATCH 03/14] [DOC] Fix documentation and consistency in arguments
 for the EMR user-tools (#413)

* Fix doc and consistency in arguments for the EMR user-tools

---------

Signed-off-by: Partho Sarthi <psarthi@nvidia.com>
---
 user_tools/docs/user-tools-aws-emr.md | 28 +++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/user_tools/docs/user-tools-aws-emr.md b/user_tools/docs/user-tools-aws-emr.md
index fcff70152..80f484b64 100644
--- a/user_tools/docs/user-tools-aws-emr.md
+++ b/user_tools/docs/user-tools-aws-emr.md
@@ -68,18 +68,18 @@ The local deployment runs on the local development machine. It requires:
 
 | Option                         | Description                                                                                                                                                                                                                                                                                                                                                                                                 | Default                                                                                                                                                                                                                       | Required |
 |--------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:|
-| **cpu_cluster**                | The EMR-cluster on which the Apache Spark applications were executed. Accepted values are an EMR-cluster name, or a valid path to the cluster properties file (json format) generated by AWS CLI command `emr describe-cluster`                                                                                                                                                                             | N/A                                                                                                                                                                                                                           |     Y    |
-| **eventlogs**                  | A comma seperated list of S3 urls pointing to event logs or S3 directory                                                                                                                                                                                                                                                                                                                                    | Reads the Spark's property `spark.eventLog.dir` defined in `cpu_cluster`. This property should be included in the output of `emr describe-cluster`. Note that the wrapper will raise an exception if the property is not set. |     N    |
-| **remote_folder**              | The S3 folder where the output of the wrapper's output is copied. If missing, the output will be available only on local disk                                                                                                                                                                                                                                                                               | N/A                                                                                                                                                                                                                           |     N    |
-| **gpu_cluster**                | The EMR-cluster on which the Spark applications is planned to be migrated. The argument can be an EMR-cluster or a valid path to the cluster's properties file (json format) generated by the AWS CLI `emr describe-cluster` command                                                                                                                                                                        | The wrapper maps the EC2 machine instances of the original cluster into EC2 instances that support GPU acceleration                                                                                                           |     N    |
-| **local_folder**               | Local work-directory path to store the output and to be used as root directory for temporary folders/files. The final output will go into a subdirectory named `qual-${EXEC_ID}` where `exec_id` is an auto-generated unique identifier of the execution.                                                                                                                                                   | If the argument is NONE, the default value is the env variable `RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY` if any; or the current working directory.                                                                                 |     N    |
-| **jvm_heap_size**              | The maximum heap size of the JVM in gigabytes                                                                                                                                                                                                                                                                                                                                                               | 24                                                                                                                                                                                                                            |     N    |
-| **profile**                    | A named AWS profile that you can specify to get the settings/credentials of the AWS account                                                                                                                                                                                                                                                                                                                 | "default" if the env-variable `AWS_PROFILE` is not set                                                                                                                                                                        |     N    |
-| **tools_jar**                  | Path to a bundled jar including RAPIDS tool. The path is a local filesystem, or remote S3 url                                                                                                                                                                                                                                                                                                               | Downloads the latest rapids-tools_*.jar from mvn repo                                                                                                                                                                         |     N    |
-| **filter_apps**                | Filtering criteria of the applications listed in the final STDOUT table is one of the following (`NONE`, `SPEEDUPS`, `SAVINGS`). "`NONE`" means no filter applied. "`SPEEDUPS`" lists all the apps that are either '_Recommended_', or '_Strongly Recommended_' based on speedups. "`SAVINGS`" lists all the apps that have positive estimated GPU savings except for the apps that are '_Not Applicable_'. | `SAVINGS`                                                                                                                                                                                                                     |     N    |
-| **gpu_cluster_recommendation** | The type of GPU cluster recommendation to generate. It accepts one of the following (`CLUSTER`, `JOB`, `MATCH`). `MATCH`: keep GPU cluster same number of nodes as CPU cluster; `CLUSTER`: recommend optimal GPU cluster by cost for entire cluster. `JOB`: recommend optimal GPU cluster by cost per job                                                                                                   | `MATCH`                                                                                                                                                                                                                       |     N    |
-| **verbose**                    | True or False to enable verbosity to the wrapper script                                                                                                                                                                                                                                                                                                                                                     | False if `RAPIDS_USER_TOOLS_LOG_DEBUG` is not set                                                                                                                                                                             |     N    |
-| **rapids_options****           | A list of valid [Qualification tool options](../../core/docs/spark-qualification-tool.md#qualification-tool-options). Note that (`output-directory`, `platform`) flags are ignored, and that multiple "spark-property" is not supported.                                                                                                                                                                    | N/A                                                                                                                                                                                                                           |     N    |
+| **cpu_cluster**                | The EMR-cluster on which the Apache Spark applications were executed. Accepted values are an EMR-cluster name, or a valid path to the cluster properties file (json format) generated by AWS CLI command `emr describe-cluster`                                                                                                                                                                             | N/A                                                                                                                                                                                                                           |    Y     |
+| **eventlogs**                  | A comma separated list pointing to event logs or S3 directory                                                                                                                                                                                                                                                                                                                                               | Reads the Spark's property `spark.eventLog.dir` defined in `cpu_cluster`. This property should be included in the output of `emr describe-cluster`. Note that the wrapper will raise an exception if the property is not set. |    N     |
+| **remote_folder**              | The S3 folder where the output of the wrapper's output is copied. If missing, the output will be available only on local disk                                                                                                                                                                                                                                                                               | N/A                                                                                                                                                                                                                           |    N     |
+| **gpu_cluster**                | The EMR-cluster on which the Spark applications is planned to be migrated. The argument can be an EMR-cluster or a valid path to the cluster's properties file (json format) generated by the AWS CLI `emr describe-cluster` command                                                                                                                                                                        | The wrapper maps the EC2 machine instances of the original cluster into EC2 instances that support GPU acceleration                                                                                                           |    N     |
+| **local_folder**               | Local work-directory path to store the output and to be used as root directory for temporary folders/files. The final output will go into a subdirectory named `qual-${EXEC_ID}` where `exec_id` is an auto-generated unique identifier of the execution.                                                                                                                                                   | If the argument is NONE, the default value is the env variable `RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY` if any; or the current working directory.                                                                                 |    N     |
+| **jvm_heap_size**              | The maximum heap size of the JVM in gigabytes                                                                                                                                                                                                                                                                                                                                                               | 24                                                                                                                                                                                                                            |    N     |
+| **profile**                    | A named AWS profile that you can specify to get the settings/credentials of the AWS account                                                                                                                                                                                                                                                                                                                 | "default" if the env-variable `AWS_PROFILE` is not set                                                                                                                                                                        |    N     |
+| **tools_jar**                  | Path to a bundled jar including RAPIDS tool. The path is a local filesystem, or remote S3 url                                                                                                                                                                                                                                                                                                               | Downloads the latest rapids-tools_*.jar from mvn repo                                                                                                                                                                         |    N     |
+| **filter_apps**                | Filtering criteria of the applications listed in the final STDOUT table is one of the following (`NONE`, `SPEEDUPS`, `SAVINGS`). "`NONE`" means no filter applied. "`SPEEDUPS`" lists all the apps that are either '_Recommended_', or '_Strongly Recommended_' based on speedups. "`SAVINGS`" lists all the apps that have positive estimated GPU savings except for the apps that are '_Not Applicable_'. | `SAVINGS`                                                                                                                                                                                                                     |    N     |
+| **gpu_cluster_recommendation** | The type of GPU cluster recommendation to generate. It accepts one of the following (`CLUSTER`, `JOB`, `MATCH`). `MATCH`: keep GPU cluster same number of nodes as CPU cluster; `CLUSTER`: recommend optimal GPU cluster by cost for entire cluster. `JOB`: recommend optimal GPU cluster by cost per job                                                                                                   | `MATCH`                                                                                                                                                                                                                       |    N     |
+| **verbose**                    | True or False to enable verbosity to the wrapper script                                                                                                                                                                                                                                                                                                                                                     | False if `RAPIDS_USER_TOOLS_LOG_DEBUG` is not set                                                                                                                                                                             |    N     |
+| **rapids_options****           | A list of valid [Qualification tool options](../../core/docs/spark-qualification-tool.md#qualification-tool-options). Note that (`output-directory`, `platform`) flags are ignored, and that multiple "spark-property" is not supported.                                                                                                                                                                    | N/A                                                                                                                                                                                                                           |    N     |
 
 #### Use case scenario
 
@@ -89,12 +89,12 @@ A typical workflow to successfully run the `qualification` command in local mode
 2. A user sets up his development machine:
    1. configures Java
    2. installs AWS CLI and configures the profile and the credentials to make sure the AWS CLI
-      commands can access the S3 resources `LOGS_BUCKET`.
+      commands can access the S3 resources at `LOGS_BUCKET`.
    3. installs `spark_rapids_user_tools`
 3. If the results of the wrapper need to be stored on S3, then another s3 uri is required `REMOTE_FOLDER=s3://OUT_BUCKET/`
 4. User defines the EMR-cluster on which the Spark application were running. Note that the cluster does not have to be
    active; but it has to be visible by the  AWS CLI (i.e., can run `aws emr describe-cluster`).
-5. The following script runs qualification by passing an AWS profile and S3 remote directory to store the output:
+5. The following script runs qualification by passing an AWS profile and S3 remote directory to store the output (user needs to define `LOGS_BUCKET`):
    
    ```
    # define the wrapper cache directory if necessary

From e62cf79911d21b4a084cc723eaea7021d3069ca3 Mon Sep 17 00:00:00 2001
From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com>
Date: Mon, 10 Jul 2023 14:36:41 -0500
Subject: [PATCH 04/14] Remove serverless execution mode from the EMR-wrapper
 (#421)

Signed-off-by: Ahmed Hussein (amahussein) <a@ahussein.me>
---
 user_tools/docs/index.md                      |  46 ----
 user_tools/docs/user-tools-aws-emr.md         |  93 -------
 .../cloud_api/databricks_aws.py               |   3 -
 .../cloud_api/databricks_azure.py             |   3 -
 .../cloud_api/dataproc.py                     |   3 -
 .../cloud_api/dataproc_job.py                 |  15 +-
 .../src/spark_rapids_pytools/cloud_api/emr.py |  15 +-
 .../spark_rapids_pytools/cloud_api/emr_job.py | 258 +-----------------
 .../spark_rapids_pytools/cloud_api/onprem.py  |   3 -
 .../cloud_api/sp_types.py                     |   3 -
 .../rapids/qualification.py                   |   9 +-
 .../wrappers/emr_wrapper.py                   | 111 +-------
 12 files changed, 12 insertions(+), 550 deletions(-)

diff --git a/user_tools/docs/index.md b/user_tools/docs/index.md
index 1c58a3ac7..ce4cad5f1 100644
--- a/user_tools/docs/index.md
+++ b/user_tools/docs/index.md
@@ -68,10 +68,6 @@ The wrapper has the following deployment modes:
    1. The cloud SDK is installed and configured correctly to access the resources such as event logs.
    2. An active cluster is running before issuing the wrapper command.
    3. The active cluster is running Spark 3.x
-3. **serverless**: The wrapper submits a serverless job that does not require setting up a cluster ahead of time.
-   This mode requires the following:
-   1. The CSP supports batch/serverless submissions.
-   2. The cloud SDK is installed and configured correctly to access the resources such as event logs.
 
 
 ## Supported platforms
@@ -85,136 +81,94 @@ The following table summarizes the commands supported for each cloud platform:
 | EMR              | qualification | local       | spark_rapids_user_tools \               |  23.02+  |
 |                  |               |             |   emr qualification [ARGS]              |          |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  | spark_rapids_user_tools \               |  23.02+  |
-|                  |               |             |   emr qualification [ARGS] \            |          |
-|                  |               |             |   --mode=serverless                     |          |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | profiling     | local       |           unsupported                   |    N/A   |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | bootstrap     | local       | spark_rapids_user_tools \               |  23.02+  |
 |                  |               |             |   emr bootstrap [ARGS]                  |          |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     | spark_rapids_user_tools \               |  23.02+  |
 |                  |               |             |   emr bootstrap [ARGS]                  |          |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | diagnostic    | local       |           unsupported                   |    N/A   |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     | spark_rapids_user_tools \               |  23.06+  |
 |                  |               |             |   emr diagnostic [ARGS]                 |          |
 +------------------+---------------+-------------+-----------------------------------------+----------+
 | Dataproc         | qualification | local       | spark_rapids_user_tools \               | 23.02.1+ |
 |                  |               |             |   dataproc qualification [ARGS]         |          |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     | spark_rapids_user_tools \               |  23.04+  |
 |                  |               |             |   dataproc qualification [ARGS]         |          |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | profiling     | local       | spark_rapids_user_tools \               | 23.02.1+ |
 |                  |               |             |   dataproc profiling [ARGS]             |          |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | bootstrap     | local       | spark_rapids_user_tools \               | 23.02.1+ |
 |                  |               |             |   dataproc bootstrap [ARGS]             |          |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | diagnostic    | local       |           unsupported                   |    N/A   |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     | spark_rapids_user_tools \               |  23.06+  |
 |                  |               |             |   dataproc diagnostic [ARGS]            |          |
 +------------------+---------------+-------------+-----------------------------------------+----------+
 | Databricks_AWS   | qualification | local       | spark_rapids_user_tools \               |  23.04+  |
 |                  |               |             |   databricks-aws qualification [ARGS]   |          |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | profiling     | local       | spark_rapids_user_tools \               |  23.08+  |
 |                  |               |             |   databricks-aws profiling [ARGS]       |          |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | bootstrap     | local       |           unsupported                   |    N/A   |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | diagnostic    | local       |           unsupported                   |    N/A   |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 +------------------+---------------+-------------+-----------------------------------------+----------+
 | Databricks_Azure | qualification | local       | spark_rapids_user_tools \               |  23.06+  |
 |                  |               |             |   databricks-azure qualification [ARGS] |          |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | profiling     | local       |           unsupported                   |    N/A   |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | bootstrap     | local       |           unsupported                   |    N/A   |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | diagnostic    | local       |           unsupported                   |    N/A   |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 +------------------+---------------+-------------+-----------------------------------------+----------+
 | OnPrem           | qualification | local       | spark_rapids_user_tools \               |  23.06+  |
 |                  |               |             |   onprem qualification [ARGS]           |          |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | profiling     | local       |           unsupported                   |    N/A   |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | bootstrap     | local       |           unsupported                   |    N/A   |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 |                  +---------------+-------------+-----------------------------------------+----------+
 |                  | diagnostic    | local       |           unsupported                   |    N/A   |
 |                  |               +-------------+-----------------------------------------+----------+
-|                  |               | serverless  |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
 |                  |               | cluster     |           unsupported                   |    N/A   |
 +------------------+---------------+-------------+-----------------------------------------+----------+
 ```
diff --git a/user_tools/docs/user-tools-aws-emr.md b/user_tools/docs/user-tools-aws-emr.md
index 80f484b64..a18dfc10b 100644
--- a/user_tools/docs/user-tools-aws-emr.md
+++ b/user_tools/docs/user-tools-aws-emr.md
@@ -119,99 +119,6 @@ A typical workflow to successfully run the `qualification` command in local mode
     ./qual_<YYYYmmddHHmmss>_<0x%08X>/rapids_4_spark_qualification_output/
    ```
 
-
-### Serverless deployment
-
-This deployment option allows running the RAPIDS tools on an Amazon EMR-Serverless.
-This option has the following advantages:
-1. Scalability in handling large pool of event logs
-2. Convenience for not requiring to spin a cluster.
-
-Resources to get familiar with Amazon EMR-Serverless:
-- [Getting started with Amazon EMR-Serverless](https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/getting-started.html)
-- [EMRServerless-Spark jobs](https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/jobs-spark.html)
-
-#### Prerequisites
-
-Obtain EMR-Serverless job-arn
-
-In order to submit a job on Amazon EMR-Serverless, you need to have a "_job role-arn_".
-The CLI guide on [Getting started with Amazon EMR Serverless](https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/getting-started.html)
-lists the following steps to obtain the _role-arn_ (dubbed _EMRServerlessS3RuntimeRole_):
-1. Grant permissions to use EMR-Serverless
-2. Prepare storage for EMR-Serverless to save the output of the execution and the event logs
-3. Create a job runtime role:
-   1. Create a trust policy to use for IAM-role (_EMRServerlessS3RuntimeRole_), and save role-ARN in the output
-   2. Use the trust policy to create the IAM-role
-   3. Create a policy file that defines the access to resources (event logs and jars if needed)
-   4. Create an IAM policy using the policy file
-   5. Attach the IAM policy to the job runtime role, and save the job role-arn
-4. _Optional_: create EMR-Serverless application
-
-
-#### Qualification options
-
-```
-spark_rapids_user_tools emr qualification [options] --mode=serverless
-spark_rapids_user_tools emr qualification --mode=serverless --help
-```
-
-| Option               | Description                                                                                                                                                                                                                                                                                                                                                                                                 | Default                                                                                                                                                                                                                       | Required |
-|----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
-| **cpu_cluster**      | The EMR-cluster on which the Apache Spark applications were executed. Accepted values are an EMR-cluster name, or a valid path to the cluster properties file (json format) generated by AWS CLI command `emr describe-cluster`                                                                                                                                                                             | N/A                                                                                                                                                                                                                           | Y        |
-| **remote_folder**    | The S3 folder where the output is archived                                                                                                                                                                                                                                                                                                                                                                  | N/A                                                                                                                                                                                                                           | Y        |
-| **job_arn**          | The job runtime role-ARN                                                                                                                                                                                                                                                                                                                                                                                    | N/A                                                                                                                                                                                                                           | Y        |
-| **eventlogs**        | A comma seperated list of S3 urls pointing to event logs or S3 directory                                                                                                                                                                                                                                                                                                                                    | Reads the Spark's property `spark.eventLog.dir` defined in `cpu_cluster`. This property should be included in the output of `emr describe-cluster`. Note that the wrapper will raise an exception if the property is not set. | N        |
-| **app_id**           | The ID of the EMR-serverless application on which to run the job. Note that creating an EMR-serverless application takes a few minutes                                                                                                                                                                                                                                                                      | The wrapper creates an EMR-serverless application that gets deleted at the end of the execution                                                                                                                               | N        |
-| **gpu_cluster**      | The EMR-cluster on which the Spark applications is planned to be migrated. The argument can be an EMR-cluster or a valid path to the cluster's properties file (json format) generated by the AWS CLI `emr describe-cluster` command                                                                                                                                                                        | The wrapper maps the EC2 machine instances of the original cluster into EC2 instances that support GPU acceleration                                                                                                           | N        |
-| **local_folder**     | Local work-directory path to store the output and to be used as root directory for temporary folders/files.                                                                                                                                                                                                                                                                                                 | If the argument is `NONE`, the default value is the env variable `RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY` if any; or the current working directory.                                                                               | N        |
-| **profile**          | A named AWS profile that you can specify to get the settings/credentials of the AWS account.                                                                                                                                                                                                                                                                                                                | "default" if the the env-variable `AWS_PROFILE` is not set                                                                                                                                                                    | N        |
-| **tools_jar**        | Path to a bundled jar including RAPIDS tool. The path is a local filesystem, or remote S3 url                                                                                                                                                                                                                                                                                                               | Downloads the latest `rapids-4-spark-tools_*.jar` from mvn repo                                                                                                                                                               | N        |
-| **filter_apps**      | Filtering criteria of the applications listed in the final STDOUT table is one of the following (`NONE`, `SPEEDUPS`, `SAVINGS`). '`NONE`' means no filter applied. '`SPEEDUPS`' lists all the apps that are either '_Recommended_', or '_Strongly Recommended_' based on speedups. '`SAVINGS`' lists all the apps that have positive estimated GPU savings except for the apps that are '_Not Applicable_'. | `savings`                                                                                                                                                                                                                     | N        |
-| **verbose**          | True or False to enable verbosity to the wrapper script                                                                                                                                                                                                                                                                                                                                                     | False if `RAPIDS_USER_TOOLS_LOG_DEBUG` is not set                                                                                                                                                                             | N        |
-| **rapids_options**** | A list of valid [Qualification tool options](../../core/docs/spark-qualification-tool.md#qualification-tool-options). Note that (`output-directory`, `platform`) flags are ignored, and that multiple "spark-property" is not supported.                                                                                                                                                                    | N/A                                                                                                                                                                                                                           | N        |
-
-A typical workflow to successfully run the `qualification` command in serverless mode is described as follows:
-
-1. Store the Apache Spark event logs in S3 folder.
-2. A user sets up his development machine:
-   1. installs AWS CLI and configures the profile and the credentials to make sure the AWS CLI
-       commands can access the S3 resources `LOGS_BUCKET`.
-   2. installs `spark_rapids_user_tools`
-3. Another s3 uri is required `REMOTE_FOLDER=s3://OUT_BUCKET/` to store the output
-4. User creates and sets a job runtime role as described in previous section.
-5. User defines the EMR-cluster on which the Spark application were running. Note that the cluster does not have to be
-   active; but it has to be visible by the  AWS CLI (i.e., can run `aws emr describe-cluster`).
-6. The following script runs qualification as an EMR-serverless job by passing an AWS profile,
-   S3 remote directory to store the output, and the job-ARN:
-
-   ```
-   # define the wrapper cache directory if necessary
-   export RAPIDS_USER_TOOLS_CACHE_FOLDER=my_cache_folder
-   export EVENTLOGS=s3://LOGS_BUCKET/eventlogs/
-   export CLUSTER_NAME=my-emr-cpu-cluster
-   export JOB_ARN="arn:aws:iam::[0-9]+:role/ROLE_NAME"
-   export REMOTE_FOLDER=s3://OUT_BUCKET/wrapper_output
-   export MY_AWS_PROFILE=my-aws-profile
-   
-   spark_rapids_user_tools emr qualification \
-      --eventlogs $EVENTLOGS \
-      --cpu_cluster $CLUSTER_NAME \
-      --profile $MY_AWS_PROFILE \
-      --remote_folder $REMOTE_FOLDER \
-      --job_arn $JOB_ARN \
-      --mode=serverless
-   ```
-   The wrapper generates a unique-Id for each execution in the format of `qual_<YYYYmmddHHmmss>_<0x%08X>`
-   The above command will generate an S3 directory containing `qualification_summary.csv` in addition to
-   the actual folder of the RAPIDS Qualification tool. If `local_directory` is passed as an argument, then
-   the wrapper mirrors the remote folder on local disk.
-
-   ```
-    $REMOTE_FOLDER/qual_<YYYYmmddHHmmss>_<0x%08X>/qualification_summary.csv
-    $REMOTE_FOLDER/qual_<YYYYmmddHHmmss>_<0x%08X>/rapids_4_spark_qualification_output/
-   ```
-
 ### Qualification output
 
 For each app, the command output lists the following fields:
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/databricks_aws.py b/user_tools/src/spark_rapids_pytools/cloud_api/databricks_aws.py
index 5af464da4..6bd4591ae 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/databricks_aws.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/databricks_aws.py
@@ -80,9 +80,6 @@ def create_saving_estimator(self,
                                                  source_cluster=source_cluster)
         return saving_estimator
 
-    def create_submission_job(self, job_prop, ctxt) -> Any:
-        pass
-
     def create_local_submission_job(self, job_prop, ctxt) -> Any:
         return DBAWSLocalRapidsJob(prop_container=job_prop, exec_ctxt=ctxt)
 
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/databricks_azure.py b/user_tools/src/spark_rapids_pytools/cloud_api/databricks_azure.py
index b9fce9b93..18fa0fafe 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/databricks_azure.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/databricks_azure.py
@@ -81,9 +81,6 @@ def create_saving_estimator(self,
                                                    source_cluster=source_cluster)
         return saving_estimator
 
-    def create_submission_job(self, job_prop, ctxt) -> Any:
-        pass
-
     def create_local_submission_job(self, job_prop, ctxt) -> Any:
         return DBAzureLocalRapidsJob(prop_container=job_prop, exec_ctxt=ctxt)
 
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py
index 7823adf12..b6c4f5124 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py
@@ -112,9 +112,6 @@ def create_saving_estimator(self,
                                                     source_cluster=source_cluster)
         return saving_estimator
 
-    def create_submission_job(self, job_prop, ctxt) -> Any:
-        pass
-
     def create_local_submission_job(self, job_prop, ctxt) -> Any:
         return DataprocLocalRapidsJob(prop_container=job_prop, exec_ctxt=ctxt)
 
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_job.py b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_job.py
index b06a7ab35..7093c08cf 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_job.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_job.py
@@ -16,7 +16,7 @@
 
 from dataclasses import dataclass
 
-from spark_rapids_pytools.rapids.rapids_job import RapidsJob, RapidsLocalJob, RapidsSubmitSparkJob
+from spark_rapids_pytools.rapids.rapids_job import RapidsLocalJob, RapidsSubmitSparkJob
 
 
 @dataclass
@@ -33,16 +33,3 @@ class DataprocSubmitSparkRapidsJob(RapidsSubmitSparkJob):
     Implementation of a RAPIDS job that runs on a remote .
     """
     job_label = 'dataprocRemoteSparkJobSubmission'
-
-
-@dataclass
-class DataprocServerlessRapidsJob(RapidsJob):
-    """
-    An implementation that uses Dataproc-Serverless to run RAPIDS accelerator tool.
-    """
-
-    def _build_submission_cmd(self):
-        pass
-
-    def _submit_job(self, cmd_args: list) -> str:
-        pass
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/emr.py b/user_tools/src/spark_rapids_pytools/cloud_api/emr.py
index d3c136eb5..1c37519a9 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/emr.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/emr.py
@@ -19,7 +19,7 @@
 from dataclasses import field, dataclass
 from typing import Any, List
 
-from spark_rapids_pytools.cloud_api.emr_job import EmrServerlessRapidsJob, EmrLocalRapidsJob
+from spark_rapids_pytools.cloud_api.emr_job import EmrLocalRapidsJob
 from spark_rapids_pytools.cloud_api.s3storage import S3StorageDriver
 from spark_rapids_pytools.cloud_api.sp_types import PlatformBase, ClusterBase, CMDDriverBase, \
     CloudPlatform, ClusterState, SparkNodeType, ClusterNode, GpuHWInfo, SysInfo, GpuDevice, \
@@ -29,7 +29,6 @@
 from spark_rapids_pytools.common.utilities import Utils
 from spark_rapids_pytools.pricing.emr_pricing import EMREc2PriceProvider
 from spark_rapids_pytools.pricing.price_provider import SavingsEstimator
-from spark_rapids_pytools.rapids.rapids_job import RapidsJobPropContainer, RapidsJob
 
 
 @dataclass
@@ -91,15 +90,6 @@ def validate_job_submission_args(self, submission_args: dict) -> dict:
         :return: a dictionary with the processed arguments.
         """
         # TODO: verify that all arguments are valid
-        valid_keys = ['execution-role-arn', 'application-id']
-        for submit_arg in submission_args:
-            if submit_arg not in valid_keys:
-                raise RuntimeError(f'Invalid submission argument [{submit_arg}]. Accepted arguments: {valid_keys}.')
-            if submit_arg == 'application-id' and submission_args.get(submit_arg) is None:
-                # show a message that the appID is not passed
-                self.cli.logger.warning('The EMR-Serverless application-ID is not set. '
-                                        'Note that it is recommended to use a pre-existing SPARK EMR-Serverless '
-                                        'application-id to reduce the overhead of initializing the job.')
         return submission_args
 
     def create_saving_estimator(self,
@@ -117,9 +107,6 @@ def create_saving_estimator(self,
                                                source_cluster=source_cluster)
         return saving_estimator
 
-    def create_submission_job(self, job_prop: RapidsJobPropContainer, ctxt) -> RapidsJob:
-        return EmrServerlessRapidsJob(prop_container=job_prop, exec_ctxt=ctxt)
-
     def create_local_submission_job(self, job_prop, ctxt) -> Any:
         return EmrLocalRapidsJob(prop_container=job_prop, exec_ctxt=ctxt)
 
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/emr_job.py b/user_tools/src/spark_rapids_pytools/cloud_api/emr_job.py
index b0b33d746..5e800bbbf 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/emr_job.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/emr_job.py
@@ -14,262 +14,8 @@
 
 """Implementation of Job submissions on EMR"""
 
-import json
-import time
-from dataclasses import field, dataclass
-from logging import Logger
-
-from spark_rapids_pytools.cloud_api.sp_types import EnumeratedType
-from spark_rapids_pytools.common.prop_manager import JSONPropertiesContainer
-from spark_rapids_pytools.common.utilities import ToolLogging, Utils
-from spark_rapids_pytools.rapids.rapids_job import RapidsJob, RapidsLocalJob
-from spark_rapids_pytools.rapids.tool_ctxt import ToolContext
-
-
-class EMRJobState(EnumeratedType):
-    """
-    Standard states for an EMR job.
-    """
-    SUBMITTED = 'submitted'
-    PENDING = 'Pending'
-    SCHEDULED = 'Scheduled'
-    RUNNING = 'Running'
-    FAILED = 'Failed'
-    SUCCESS = 'Success'
-    CANCELLING = 'Cancelling'
-    CANCELLED = 'Cancelled'
-    UNKNOWN = 'Unknown'
-
-
-class EMRAppState(EnumeratedType):
-    """
-    Standard states for a EMR application.
-    Creating 	The application is being prepared and isn't ready to use yet.
-    Created 	The application has been created but hasn't provisioned capacity yet.
-                 You can modify the application to change its initial capacity configuration.
-    Starting 	The application is starting and is provisioning capacity.
-    Started 	The application is ready to accept new jobs. The application only accepts jobs when
-                it's in this state.
-    Stopping 	All jobs have completed and the application is releasing its capacity.
-    Stopped 	The application is stopped and no resources are running on the application.
-                You can modify the application to change its initial capacity configuration.
-    Terminated 	The application has been terminated and doesn't appear on your application list.
-    """
-    CREATING = 'Creating'
-    CREATED = 'Created'
-    STARTING = 'Starting'
-    STARTED = 'Started'
-    STOPPING = 'Stopping'
-    STOPPED = 'Stopped'
-    TERMINATED = 'Terminated'
-
-
-class EMRAppType(EnumeratedType):
-    """
-    Standard types for EMR application types.
-    """
-    SPARK = 'SPARK'
-
-
-@dataclass
-class EMRServerlessApplication:
-    """
-    A wrapper that encapsulates an EMR-serverless application created to host the submitted
-    RAPIDS tool job through EMR-serverless.
-    """
-    app_name: None
-    id: str = None
-    exec_ctxt: ToolContext = None
-    app_type: EMRAppType = EMRAppType.SPARK
-    state: EMRAppState = field(default_factory=dict, init=False)
-    details: JSONPropertiesContainer = field(default=None, init=False)
-    outlive_submission: bool = field(default=False, init=False)
-    logger: Logger = field(default=None, init=False)
-
-    def __post_init__(self):
-        # when debug is set to true set it in the environment.
-        self.logger = ToolLogging.get_and_setup_logger('rapids.tools.submit.app')
-        if self.id is not None:
-            self.outlive_submission = True
-            self._update_status()
-        else:
-            # if name is none auto generate a new one:
-            if self.app_name is None:
-                self.app_name = f'rapids-tools-{Utils.gen_random_string(8)}'
-            self.outlive_submission = False
-            self._create_as_new()
-
-    def _update_status(self):
-        cmd_args = [
-            'aws',
-            'emr-serverless',
-            'get-application',
-            '--application-id',
-            self.id
-        ]
-        std_out = self.exec_ctxt.platform.cli.run_sys_cmd(cmd_args)
-        self.details = JSONPropertiesContainer(prop_arg=std_out, file_load=False)
-        self.state = EMRAppState.fromstring(self.details.get_value('application', 'state'))
-        self.app_name = self.details.get_value('application', 'name')
-
-    def _create_as_new(self):
-        cmd_args = [
-            'aws',
-            'emr-serverless',
-            'create-application',
-            '--release-label',
-            'emr-6.9.0',
-            '--type',
-            f'\"{EMRAppType.tostring(self.app_type)}\"',
-            '--name',
-            self.app_name
-        ]
-        self.logger.info('Creating new EMR-serverless application')
-        std_out = self.exec_ctxt.platform.cli.run_sys_cmd(cmd_args)
-        json_value = json.loads(std_out)
-        self.id = json_value['applicationId']
-        self._update_status()
-        return self
-
-    def _wait_for_states(self, *states):
-        self.logger.debug('Waiting for application to reach state: %s', states)
-        while self.state not in states:
-            time.sleep(5)
-            self._update_status()
-        self.logger.debug('Done waiting for application to reach state: %s', states)
-
-    def app_is_terminated(self):
-        return self.state in [EMRAppState.TERMINATED, EMRAppState.STOPPED]
-
-    def wait_for_app_ready(self):
-        if self.app_is_terminated():
-            raise RuntimeError(f'EMR Application {self.id} is not active. '
-                               f'Current state is {self.state}.')
-        self._wait_for_states(EMRAppState.STARTED, EMRAppState.STARTING, EMRAppState.CREATED)
-
-    def stop_app(self):
-        self.logger.info('Start stopping application %s. Current state %s', self.id, self.state)
-        if self.state not in [EMRAppState.STOPPING, EMRAppState.STOPPED, EMRAppState.TERMINATED]:
-            cmd_args = [
-                'aws', 'emr-serverless', 'stop-application', '--application-id', self.id
-            ]
-            std_out = self.exec_ctxt.platform.cli.run_sys_cmd(cmd_args)
-            self.logger.info('Application %s has stopped: %s', self.id, std_out)
-        else:
-            self.logger.info('Application %s. was already stopped %s', self.id, self.state)
-        self._wait_for_states(EMRAppState.TERMINATED, EMRAppState.STOPPED)
-
-    def delete_app(self):
-        # stop app first
-        self.stop_app()
-        # now delete teh app
-        cmd_args = [
-            'aws', 'emr-serverless', 'delete-application', '--application-id', self.id
-        ]
-        std_out = self.exec_ctxt.platform.cli.run_sys_cmd(cmd_args)
-        self.logger.info('the emr-serverless application was deleted. %s', std_out)
-
-    def terminate_app(self):
-        if self.outlive_submission:
-            self.logger.info('Skipping termination of Emr-serverless app %s. The app outlives the '
-                             'execution.',
-                             self.id)
-        else:
-            self.logger.info('Deleting the temporary app %s with current state %s', self.id, self.state)
-            self.delete_app()
-
-
-@dataclass
-class EmrServerlessRapidsJob(RapidsJob):
-    """
-    An implementation that uses EMR-Serverless to run RAPIDS accelerator tool.
-    """
-    job_label = 'emrServerless'
-    job_state: EMRJobState = field(default=None, init=False)
-    emr_app: EMRServerlessApplication = field(default=None, init=False)
-
-    def is_finished(self):
-        if self.job_state is None:
-            return False
-        return self.job_state in [EMRJobState.FAILED, EMRJobState.SUCCESS, EMRJobState.CANCELLED]
-
-    def _init_fields(self):
-        super()._init_fields()
-        app_id = self.prop_container.get_value_silent('platformArgs', 'application-id')
-        self.emr_app = EMRServerlessApplication(id=app_id,
-                                                app_name=self.__generate_app_name(),
-                                                exec_ctxt=self.exec_ctxt)
-        self.emr_app.wait_for_app_ready()
-
-    def __generate_app_name(self) -> str:
-        return self.exec_ctxt.get_ctxt('execFullName')
-
-    def __get_role_arn(self):
-        return self.prop_container.get_value('platformArgs', 'execution-role-arn')
-
-    def __generate_job_name(self):
-        # use the same name as the output folder
-        return f'{self.emr_app.app_name}-{Utils.gen_random_string(4)}'
-
-    def __build_driver(self) -> str:
-        spark_job_configs = self.prop_container.get_value('sparkConfArgs', 'properties')
-        spark_params = ['--class', self.prop_container.get_jar_main_class()]
-        for conf_k, conf_val in spark_job_configs.items():
-            conf_arg = ['--conf', f'{conf_k}={conf_val}']
-            spark_params.extend(conf_arg)
-        submit_params = {
-            'entryPoint': self.prop_container.get_jar_file(),
-            'entryPointArguments': self._build_rapids_args(),
-            'sparkSubmitParameters': Utils.gen_joined_str(' ', spark_params)
-        }
-        res = {
-            'sparkSubmit': submit_params
-        }
-        return json.dumps(res)
-
-    def _build_submission_cmd(self):
-        cmd_args = ['aws',
-                    'emr-serverless',
-                    'start-job-run']
-        # add application_id
-        cmd_args.extend(['--application-id', self.emr_app.id])
-        # add job_role_arn
-        cmd_args.extend(['--execution-role-arn', self.__get_role_arn()])
-        # add job_name
-        cmd_args.extend(['--name', self.__generate_job_name()])
-        # add job_driver
-        cmd_args.extend(['--job-driver', f"\'{self.__build_driver()}\'"])
-        return cmd_args
-
-    def __get_job_details(self, job_id: str, app_id: str) -> dict:
-        cmd_args = [
-            'aws',
-            'emr-serverless',
-            'get-job-run',
-            '--application-id',
-            app_id,
-            '--job-run-id',
-            job_id]
-        std_out = self.exec_ctxt.platform.cli.run_sys_cmd(cmd_args)
-        return json.loads(std_out)
-
-    def _submit_job(self, cmd_args: list) -> str:
-        std_out = self.exec_ctxt.platform.cli.run_sys_cmd(cmd_args)
-        self.logger.debug('Output of job submission is %s', std_out)
-        # get the job_id
-        json_out = json.loads(std_out)
-        job_id = json_out['jobRunId']
-        self.logger.info('Submitted JOB ID is %s', job_id)
-        # wait while the job is still running
-        while not self.is_finished():
-            time.sleep(10)
-            job_details = self.__get_job_details(job_id, self.emr_app.id)
-            self.job_state = EMRJobState.fromstring(job_details['jobRun']['state'])
-            self.logger.info('Job state: %s', self.job_state)
-        self.logger.info('Done waiting for the job to finish.')
-        # Cleanup the emr application if necessary
-        self.emr_app.terminate_app()
-        return std_out
+from dataclasses import dataclass
+from spark_rapids_pytools.rapids.rapids_job import RapidsLocalJob
 
 
 @dataclass
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py b/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py
index 0bbd5d53b..d823e223f 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py
@@ -81,9 +81,6 @@ def get_platform_name(self) -> str:
     def get_footer_message(self) -> str:
         return 'To support acceleration with T4 GPUs, please use these worker node instance types.'
 
-    def create_submission_job(self, job_prop, ctxt) -> Any:
-        pass
-
     def create_saving_estimator(self,
                                 source_cluster: ClusterGetAccessor,
                                 reshaped_cluster: ClusterGetAccessor):
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/sp_types.py b/user_tools/src/spark_rapids_pytools/cloud_api/sp_types.py
index 0cf92a3cd..d8c28b88a 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/sp_types.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/sp_types.py
@@ -850,9 +850,6 @@ def create_saving_estimator(self,
                                 reshaped_cluster: ClusterGetAccessor):
         raise NotImplementedError
 
-    def create_submission_job(self, job_prop, ctxt) -> Any:
-        raise NotImplementedError
-
     def create_local_submission_job(self, job_prop, ctxt) -> Any:
         raise NotImplementedError
 
diff --git a/user_tools/src/spark_rapids_pytools/rapids/qualification.py b/user_tools/src/spark_rapids_pytools/rapids/qualification.py
index 9a7098355..3a37ded5e 100644
--- a/user_tools/src/spark_rapids_pytools/rapids/qualification.py
+++ b/user_tools/src/spark_rapids_pytools/rapids/qualification.py
@@ -22,7 +22,7 @@
 import pandas as pd
 from tabulate import tabulate
 
-from spark_rapids_pytools.cloud_api.sp_types import EnumeratedType, ClusterReshape, DeployMode
+from spark_rapids_pytools.cloud_api.sp_types import EnumeratedType, ClusterReshape
 from spark_rapids_pytools.common.sys_storage import FSUtil
 from spark_rapids_pytools.common.utilities import Utils, TemplateGenerator
 from spark_rapids_pytools.pricing.price_provider import SavingsEstimator
@@ -367,11 +367,8 @@ def _prepare_job_arguments(self):
         }
         job_properties = RapidsJobPropContainer(prop_arg=job_properties_json,
                                                 file_load=False)
-        if self.ctxt.get_deploy_mode() == DeployMode.REMOTE_CLUSTER:
-            job_obj = self.ctxt.platform.create_spark_submission_job(job_prop=job_properties,
-                                                                     ctxt=self.ctxt)
-        else:
-            job_obj = self.ctxt.platform.create_submission_job(job_prop=job_properties, ctxt=self.ctxt)
+        job_obj = self.ctxt.platform.create_spark_submission_job(job_prop=job_properties,
+                                                                 ctxt=self.ctxt)
         job_obj.run_job()
 
     def __get_recommended_apps(self, all_rows, selected_cols=None) -> pd.DataFrame:
diff --git a/user_tools/src/spark_rapids_pytools/wrappers/emr_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/emr_wrapper.py
index f5c82c588..7513cd8b5 100644
--- a/user_tools/src/spark_rapids_pytools/wrappers/emr_wrapper.py
+++ b/user_tools/src/spark_rapids_pytools/wrappers/emr_wrapper.py
@@ -20,7 +20,7 @@
 from spark_rapids_pytools.rapids.bootstrap import Bootstrap
 from spark_rapids_pytools.rapids.diagnostic import Diagnostic
 from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, \
-    Qualification, QualGpuClusterReshapeType
+    QualGpuClusterReshapeType
 
 
 class CliEmrLocalMode:  # pylint: disable=too-few-public-methods
@@ -204,113 +204,12 @@ def diagnostic(cluster: str,
         diag_tool.launch()
 
 
-class CliEmrServerlessMode:  # pylint: disable=too-few-public-methods
-    """
-    A wrapper that runs the logic as an EMR-serverless application.
-    """
-
-    @staticmethod
-    def qualification(cpu_cluster: str,
-                      remote_folder: str,
-                      job_arn: str,
-                      eventlogs: str = None,
-                      profile: str = None,
-                      app_id: str = None,
-                      gpu_cluster: str = None,
-                      local_folder: str = None,
-                      tools_jar: str = None,
-                      filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
-                      verbose: bool = False,
-                      **rapids_options) -> None:
-        """
-        The Qualification tool analyzes Spark events generated from CPU based Spark applications to
-        help quantify the expected acceleration and costs savings of migrating a Spark application
-        or query to GPU. The wrapper submits Spark EMR-Serverless job to run the RAPIDS accelerator.
-
-        :param cpu_cluster: The EMR-cluster on which the Spark applications were executed. The argument
-               can be an EMR-cluster or a valid path to the cluster's properties file (json format)
-               generated by the AWS CLI.
-        :param eventlogs: Event log filenames or S3 storage directories
-               containing event logs (comma separated). If missing, the wrapper Reads the Spark's
-               property `spark.eventLog.dir` defined in `cpu_cluster`. This property should be included
-               in the output of `emr describe-cluster`. Note that the wrapper will raise an exception
-               if the property is not set.
-        :param remote_folder: The S3 folder where the output is archived.
-        :param job_arn: The execution role ARN for the job run.
-        :param app_id: The ID of the EMR-serverless application on which to run the job.
-                If missing, the wrapper creates an EMR-serverless application that gets deleted at
-                the end of the execution. Note that creating an EMR-serverless application takes a few
-                minutes.
-        :param gpu_cluster: The EMR-cluster on which the Spark applications is planned to be migrated.
-                The argument can be an EMR-cluster or a valid path to the cluster's properties file
-                (json format) generated by the AWS CLI. If missing, the wrapper maps the EC2 machine
-                instances of the original cluster into EC2 instances that support GPU acceleration.
-        :param local_folder: Local work-directory path to store the output and to be used as root
-                directory for temporary folders/files. If the argument is NONE,
-               the default value is the env variable RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY if any;
-               or the current working directory.
-        :param profile: A named AWS profile that you can specify to get the settings/credentials of the AWS account.
-        :param tools_jar: Path to a bundled jar including RAPIDS tool. The path is a local filesystem,
-                or remote S3 url. If missing, the wrapper downloads the latest rapids-4-spark-tools_*.jar
-                from maven repo.
-        :param filter_apps: filtering criteria of the applications listed in the final STDOUT table
-                is one of the following (NONE, SPEEDUPS, savings). Default is "SAVINGS".
-                Note that this filter does not affect the CSV report.
-                "NONE" means no filter applied. "SPEEDUPS" lists all the apps that are either
-                'Recommended', or 'Strongly Recommended' based on speedups. "SAVINGS"
-                lists all the apps that have positive estimated GPU savings except for the apps that
-                are "Not Applicable".
-        :param verbose: True or False to enable verbosity to the wrapper script.
-        :param rapids_options: A list of valid Qualification tool options.
-            Note that the wrapper ignores ["output-directory", "platform"] flags, and it does not support
-            multiple “spark-property“ arguments.
-            For more details on Qualification tool options, please visit
-            https://nvidia.github.io/spark-rapids/docs/spark-qualification-tool.html#qualification-tool-options
-        """
-        if verbose:
-            # when debug is set to true set it in the environment.
-            ToolLogging.enable_debug_mode()
-        wrapper_qual_options = {
-            'platformOpts': {
-                'profile': profile,
-                'deployMode': DeployMode.SERVERLESS,
-            },
-            'migrationClustersProps': {
-                'cpuCluster': cpu_cluster,
-                'gpuCluster': gpu_cluster
-            },
-            'jobSubmissionProps': {
-                'remoteFolder': remote_folder,
-                'platformArgs': {
-                    'application-id': app_id,
-                    'execution-role-arn': job_arn,
-                }
-            },
-            'eventlogs': eventlogs,
-            'filterApps': filter_apps,
-            'toolsJar': tools_jar,
-        }
-        Qualification(platform_type=CloudPlatform.EMR,
-                      cluster=None,
-                      output_folder=local_folder,
-                      wrapper_options=wrapper_qual_options,
-                      rapids_options=rapids_options).launch()
-
-
 class EMRWrapper:  # pylint: disable=too-few-public-methods
     """
     A wrapper script to run RAPIDS Accelerator tools (Qualification, Profiling, and Bootstrap) on Amazon EMR.
-    :param mode: The deployment mode of the tool command from RAPIDS Accelerator for Apache Spark. Accepted options
-                are <local|serverless>. local means that the tool runs locally on the development machine.
-                "serverless" means that the wrapper will trigger an Amazon EMR cluster to submit a new job.
     """
 
-    def __init__(self, mode: str = DeployMode.pretty_print(DeployMode.LOCAL)):
-        self.mode = mode
-        if DeployMode.fromstring(self.mode) == DeployMode.SERVERLESS:
-            self.qualification = CliEmrServerlessMode.qualification
-            self.bootstrap = CliEmrLocalMode.bootstrap
-        else:
-            self.qualification = CliEmrLocalMode.qualification
-            self.bootstrap = CliEmrLocalMode.bootstrap
-            self.diagnostic = CliEmrLocalMode.diagnostic
+    def __init__(self):
+        self.qualification = CliEmrLocalMode.qualification
+        self.bootstrap = CliEmrLocalMode.bootstrap
+        self.diagnostic = CliEmrLocalMode.diagnostic

From f9a7dd24d476943fc9fb1d534907247b082da410 Mon Sep 17 00:00:00 2001
From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com>
Date: Mon, 10 Jul 2023 17:57:25 -0500
Subject: [PATCH 05/14] Remove cluster-mode execution wrapper (#424)

* Remove cluster-mode execution wrapper

Fixes #422

- Remove the CLI remote cluster support
- Update the documentation

---------

Signed-off-by: Ahmed Hussein (amahussein) <a@ahussein.me>
---
 user_tools/docs/index.md                      | 161 ++++++------------
 user_tools/docs/user-tools-dataproc.md        |  14 --
 .../cloud_api/databricks_aws.py               |   3 -
 .../cloud_api/databricks_azure.py             |   3 -
 .../cloud_api/dataproc.py                     |   5 +-
 .../cloud_api/dataproc_job.py                 |  10 +-
 .../src/spark_rapids_pytools/cloud_api/emr.py |   3 -
 .../spark_rapids_pytools/cloud_api/onprem.py  |   3 -
 .../cloud_api/sp_types.py                     |   7 +-
 .../rapids/qualification.py                   |  96 -----------
 .../spark_rapids_pytools/rapids/rapids_job.py |  27 ---
 .../wrappers/dataproc_wrapper.py              |  30 ++--
 .../wrappers/onprem_wrapper.py                |   2 -
 13 files changed, 69 insertions(+), 295 deletions(-)

diff --git a/user_tools/docs/index.md b/user_tools/docs/index.md
index ce4cad5f1..0fd4ad4c0 100644
--- a/user_tools/docs/index.md
+++ b/user_tools/docs/index.md
@@ -55,19 +55,12 @@ number of worker nodes, Yarn configuration, Spark version and error logs etc.
 Note that the command would require `SSH` access on the cluster nodes to collect information otherwise error would
 be raised.
 
-## Deploy modes
+## Deployment
 
-The wrapper has the following deployment modes:
-
-1. **local**: The wrapper runs a Java application on the local dev machine. This mode requires the following:
+The wrapper runs a Java application on the local dev machine. This requires the following:
    1. The cloud SDK is installed and configured correctly to access the resources such as event logs.
    2. Java 1.8+ development environment
    3. access to maven repositories to download dependencies such as Spark 3.x.
-2. **cluster**: The wrapper submits a job to an existing cluster. Note that the _Bootstrap_ tool can
-   only run in this deployment mode.  This deployment mode requires the following:
-   1. The cloud SDK is installed and configured correctly to access the resources such as event logs.
-   2. An active cluster is running before issuing the wrapper command.
-   3. The active cluster is running Spark 3.x
 
 
 ## Supported platforms
@@ -75,102 +68,60 @@ The wrapper has the following deployment modes:
 The following table summarizes the commands supported for each cloud platform:
 
 ```
-+------------------+---------------+-------------+-----------------------------------------+----------+
-| platform         | command       | deploy mode |              CLI                        |  version |
-+==================+===============+=============+=========================================+==========+
-| EMR              | qualification | local       | spark_rapids_user_tools \               |  23.02+  |
-|                  |               |             |   emr qualification [ARGS]              |          |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | profiling     | local       |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | bootstrap     | local       | spark_rapids_user_tools \               |  23.02+  |
-|                  |               |             |   emr bootstrap [ARGS]                  |          |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     | spark_rapids_user_tools \               |  23.02+  |
-|                  |               |             |   emr bootstrap [ARGS]                  |          |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | diagnostic    | local       |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     | spark_rapids_user_tools \               |  23.06+  |
-|                  |               |             |   emr diagnostic [ARGS]                 |          |
-+------------------+---------------+-------------+-----------------------------------------+----------+
-| Dataproc         | qualification | local       | spark_rapids_user_tools \               | 23.02.1+ |
-|                  |               |             |   dataproc qualification [ARGS]         |          |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     | spark_rapids_user_tools \               |  23.04+  |
-|                  |               |             |   dataproc qualification [ARGS]         |          |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | profiling     | local       | spark_rapids_user_tools \               | 23.02.1+ |
-|                  |               |             |   dataproc profiling [ARGS]             |          |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | bootstrap     | local       | spark_rapids_user_tools \               | 23.02.1+ |
-|                  |               |             |   dataproc bootstrap [ARGS]             |          |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | diagnostic    | local       |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     | spark_rapids_user_tools \               |  23.06+  |
-|                  |               |             |   dataproc diagnostic [ARGS]            |          |
-+------------------+---------------+-------------+-----------------------------------------+----------+
-| Databricks_AWS   | qualification | local       | spark_rapids_user_tools \               |  23.04+  |
-|                  |               |             |   databricks-aws qualification [ARGS]   |          |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | profiling     | local       | spark_rapids_user_tools \               |  23.08+  |
-|                  |               |             |   databricks-aws profiling [ARGS]       |          |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | bootstrap     | local       |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | diagnostic    | local       |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-+------------------+---------------+-------------+-----------------------------------------+----------+
-| Databricks_Azure | qualification | local       | spark_rapids_user_tools \               |  23.06+  |
-|                  |               |             |   databricks-azure qualification [ARGS] |          |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | profiling     | local       |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | bootstrap     | local       |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | diagnostic    | local       |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-+------------------+---------------+-------------+-----------------------------------------+----------+
-| OnPrem           | qualification | local       | spark_rapids_user_tools \               |  23.06+  |
-|                  |               |             |   onprem qualification [ARGS]           |          |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | profiling     | local       |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | bootstrap     | local       |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-|                  +---------------+-------------+-----------------------------------------+----------+
-|                  | diagnostic    | local       |           unsupported                   |    N/A   |
-|                  |               +-------------+-----------------------------------------+----------+
-|                  |               | cluster     |           unsupported                   |    N/A   |
-+------------------+---------------+-------------+-----------------------------------------+----------+
++------------------+---------------+-----------------------------------------+----------+
+| platform         | command       |              CLI                        |  version |
++==================+===============+=========================================+==========+
+| EMR              | qualification | spark_rapids_user_tools \               |  23.02+  |
+|                  |               |   emr qualification [ARGS]              |          |
+|                  +---------------+-----------------------------------------+----------+
+|                  | profiling     |               N/A                       |    TBD   |
+|                  +---------------+-----------------------------------------+----------+
+|                  | bootstrap     | spark_rapids_user_tools \               |  23.02+  |
+|                  |               |   emr bootstrap [ARGS]                  |          |
+|                  +---------------+-----------------------------------------+----------+
+|                  | diagnostic    | spark_rapids_user_tools \               |  23.06+  |
+|                  |               |   emr diagnostic [ARGS]                 |          |
++------------------+---------------+-----------------------------------------+----------+
+| Dataproc         | qualification | spark_rapids_user_tools \               | 23.02.1+ |
+|                  |               |   dataproc qualification [ARGS]         |          |
+|                  +---------------+-----------------------------------------+----------+
+|                  | profiling     | spark_rapids_user_tools \               | 23.02.1+ |
+|                  |               |   dataproc profiling [ARGS]             |          |
+|                  +---------------+-----------------------------------------+----------+
+|                  | bootstrap     | spark_rapids_user_tools \               | 23.02.1+ |
+|                  |               |   dataproc bootstrap [ARGS]             |          |
+|                  +---------------+-----------------------------------------+----------+
+|                  | diagnostic    | spark_rapids_user_tools \               |  23.06+  |
+|                  |               |   dataproc diagnostic [ARGS]            |          |
++------------------+---------------+-----------------------------------------+----------+
+| Databricks_AWS   | qualification | spark_rapids_user_tools \               |  23.04+  |
+|                  |               |   databricks-aws qualification [ARGS]   |          |
+|                  +---------------+-----------------------------------------+----------+
+|                  | profiling     | spark_rapids_user_tools \               |  23.08+  |
+|                  |               |   databricks-aws profiling [ARGS]       |          |
+|                  +---------------+-----------------------------------------+----------+
+|                  | bootstrap     |               N/A                       |    TBD   |
+|                  +---------------+-----------------------------------------+----------+
+|                  | diagnostic    |               N/A                       |    TBD   |
++------------------+---------------+-----------------------------------------+----------+
+| Databricks_Azure | qualification | spark_rapids_user_tools \               |  23.06+  |
+|                  |               |   databricks-azure qualification [ARGS] |          |
+|                  +---------------+-----------------------------------------+----------+
+|                  | profiling     |               N/A                       |    TBD   |
+|                  +---------------+-----------------------------------------+----------+
+|                  | bootstrap     |               N/A                       |    TBD   |
+|                  +---------------+-----------------------------------------+----------+
+|                  | diagnostic    |               N/A                       |    TBD   |
++------------------+---------------+-----------------------------------------+----------+
+| OnPrem           | qualification | spark_rapids_user_tools \               |  23.06+  |
+|                  |               |   onprem qualification [ARGS]           |          |
+|                  +---------------+-----------------------------------------+----------+
+|                  | profiling     |               N/A                       |    TBD   |
+|                  +---------------+-----------------------------------------+----------+
+|                  | bootstrap     |               N/A                       |    TBD   |
+|                  +---------------+-----------------------------------------+----------+
+|                  | diagnostic    |               N/A                       |    TBD   |
++------------------+---------------+-----------------------------------------+----------+
 ```
 
 Please visit the following guides for details on how to use the wrapper CLI on each of the following
diff --git a/user_tools/docs/user-tools-dataproc.md b/user_tools/docs/user-tools-dataproc.md
index bab4ed819..f5a6d66a3 100644
--- a/user_tools/docs/user-tools-dataproc.md
+++ b/user_tools/docs/user-tools-dataproc.md
@@ -176,20 +176,6 @@ Spark RAPIDS with your applications.
 A GSheet template with instructions can be found at here: [link](https://docs.google.com/spreadsheets/d/1CslQHTwxHEDTlAP4lcrOzbSrmucvn8z4iFlJo6EAhxs/edit#gid=1607726286).  
 Make a copy of the GSheet template and then follow the instructions listed in the `Instructions` tab.
 
-### Cluster deployment
-
-This deployment option allows running the RAPIDS tools on an active Dataproc cluster that is
-running a Spark-3.x+.  
-This option offers the scalability in handling large pool of event logs.
-
-The Cluster-deployment mode is similar to the [Local-deployment](#local-deployment) except that the CLI
-takes a new argument `execution_cluster` which is the name of the Dataproc cluster on which the tool is
-intended to run.
-
-```
-spark_rapids_user_tools dataproc qualification --execution_cluster <CLUSTER_NAME> [options]
-spark_rapids_user_tools dataproc qualification --help
-```
 
 ## Profiling command
 
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/databricks_aws.py b/user_tools/src/spark_rapids_pytools/cloud_api/databricks_aws.py
index 6bd4591ae..febead026 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/databricks_aws.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/databricks_aws.py
@@ -86,9 +86,6 @@ def create_local_submission_job(self, job_prop, ctxt) -> Any:
     def validate_job_submission_args(self, submission_args: dict) -> dict:
         pass
 
-    def create_spark_submission_job(self, job_prop, ctxt) -> Any:
-        raise NotImplementedError
-
 
 @dataclass
 class DBAWSCMDDriver(CMDDriverBase):
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/databricks_azure.py b/user_tools/src/spark_rapids_pytools/cloud_api/databricks_azure.py
index 18fa0fafe..e574f9923 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/databricks_azure.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/databricks_azure.py
@@ -87,9 +87,6 @@ def create_local_submission_job(self, job_prop, ctxt) -> Any:
     def validate_job_submission_args(self, submission_args: dict) -> dict:
         pass
 
-    def create_spark_submission_job(self, job_prop, ctxt) -> Any:
-        raise NotImplementedError
-
     def get_supported_gpus(self) -> dict:
         gpus_from_configs = self.configs.get_value('gpuConfigs', 'user-tools', 'supportedGpuInstances')
         gpu_scopes = {}
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py
index b6c4f5124..3bc5ee370 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py
@@ -19,7 +19,7 @@
 from dataclasses import dataclass, field
 from typing import Any, List
 
-from spark_rapids_pytools.cloud_api.dataproc_job import DataprocLocalRapidsJob, DataprocSubmitSparkRapidsJob
+from spark_rapids_pytools.cloud_api.dataproc_job import DataprocLocalRapidsJob
 from spark_rapids_pytools.cloud_api.gstorage import GStorageDriver
 from spark_rapids_pytools.cloud_api.sp_types import PlatformBase, CMDDriverBase, CloudPlatform, \
     ClusterBase, ClusterNode, SysInfo, GpuHWInfo, SparkNodeType, ClusterState, GpuDevice, \
@@ -115,9 +115,6 @@ def create_saving_estimator(self,
     def create_local_submission_job(self, job_prop, ctxt) -> Any:
         return DataprocLocalRapidsJob(prop_container=job_prop, exec_ctxt=ctxt)
 
-    def create_spark_submission_job(self, job_prop, ctxt) -> Any:
-        return DataprocSubmitSparkRapidsJob(prop_container=job_prop, exec_ctxt=ctxt)
-
     def validate_job_submission_args(self, submission_args: dict) -> dict:
         pass
 
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_job.py b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_job.py
index 7093c08cf..36ee0f760 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_job.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_job.py
@@ -16,7 +16,7 @@
 
 from dataclasses import dataclass
 
-from spark_rapids_pytools.rapids.rapids_job import RapidsLocalJob, RapidsSubmitSparkJob
+from spark_rapids_pytools.rapids.rapids_job import RapidsLocalJob
 
 
 @dataclass
@@ -25,11 +25,3 @@ class DataprocLocalRapidsJob(RapidsLocalJob):
     Implementation of a RAPIDS job that runs on a local machine.
     """
     job_label = 'dataprocLocal'
-
-
-@dataclass
-class DataprocSubmitSparkRapidsJob(RapidsSubmitSparkJob):
-    """
-    Implementation of a RAPIDS job that runs on a remote .
-    """
-    job_label = 'dataprocRemoteSparkJobSubmission'
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/emr.py b/user_tools/src/spark_rapids_pytools/cloud_api/emr.py
index 1c37519a9..4650317ba 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/emr.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/emr.py
@@ -110,9 +110,6 @@ def create_saving_estimator(self,
     def create_local_submission_job(self, job_prop, ctxt) -> Any:
         return EmrLocalRapidsJob(prop_container=job_prop, exec_ctxt=ctxt)
 
-    def create_spark_submission_job(self, job_prop, ctxt) -> Any:
-        raise NotImplementedError
-
 
 @dataclass
 class EMRCMDDriver(CMDDriverBase):
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py b/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py
index d823e223f..1c92881bf 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py
@@ -99,9 +99,6 @@ def create_saving_estimator(self,
                                                       source_cluster=source_cluster)
         return saving_estimator
 
-    def create_spark_submission_job(self, job_prop, ctxt) -> Any:
-        pass
-
     def set_offline_cluster(self, cluster_args: dict = None):
         pass
 
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/sp_types.py b/user_tools/src/spark_rapids_pytools/cloud_api/sp_types.py
index d8c28b88a..07e8c0246 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/sp_types.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/sp_types.py
@@ -64,15 +64,13 @@ def pretty_print(cls, value):
 
 class DeployMode(EnumeratedType):
     """List of tools deployment methods"""
-    # The rapids job runs by submitting a spinning serverless job
-    SERVERLESS = 'serverless'
     # The rapids job is running on local node
     LOCAL = 'local'
     # The rapids job is submitted on a remote cluster
     REMOTE_CLUSTER = 'remote'
 
     def requires_remote_storage(self) -> bool:
-        return self.value in [self.SERVERLESS, self.REMOTE_CLUSTER]
+        return self.value in [self.REMOTE_CLUSTER]
 
 
 class GpuDevice(EnumeratedType):
@@ -853,9 +851,6 @@ def create_saving_estimator(self,
     def create_local_submission_job(self, job_prop, ctxt) -> Any:
         raise NotImplementedError
 
-    def create_spark_submission_job(self, job_prop, ctxt) -> Any:
-        raise NotImplementedError
-
     def load_platform_configs(self):
         config_file_name = f'{CloudPlatform.tostring(self.type_id).lower()}-configs.json'
         config_path = Utils.resource_path(config_file_name)
diff --git a/user_tools/src/spark_rapids_pytools/rapids/qualification.py b/user_tools/src/spark_rapids_pytools/rapids/qualification.py
index 3a37ded5e..cccb14d8e 100644
--- a/user_tools/src/spark_rapids_pytools/rapids/qualification.py
+++ b/user_tools/src/spark_rapids_pytools/rapids/qualification.py
@@ -26,7 +26,6 @@
 from spark_rapids_pytools.common.sys_storage import FSUtil
 from spark_rapids_pytools.common.utilities import Utils, TemplateGenerator
 from spark_rapids_pytools.pricing.price_provider import SavingsEstimator
-from spark_rapids_pytools.rapids.rapids_job import RapidsJobPropContainer
 from spark_rapids_pytools.rapids.rapids_tool import RapidsJarTool
 
 
@@ -303,74 +302,6 @@ def _process_custom_args(self):
         # This is noise to dump everything
         # self.logger.debug('%s custom arguments = %s', self.pretty_name(), self.ctxt.props['wrapperCtx'])
 
-    def _process_job_submission_args(self):
-        job_args = {}
-        submission_args = self.wrapper_options.get('jobSubmissionProps')
-        # get the root remote folder and make sure it exists
-        job_args.update(self._set_remote_folder_for_submission(self.requires_remote_folder()))
-        platform_args = submission_args.get('platformArgs')
-        if platform_args is not None:
-            processed_platform_args = self.ctxt.platform.validate_job_submission_args(platform_args)
-            job_args['platformArgs'] = processed_platform_args
-        self.ctxt.update_job_args(job_args)
-
-    def _copy_dependencies_to_remote(self):
-        self.logger.info('Preparing remote dependency folder')
-        remote_work_dir = self.ctxt.get_remote('workDir')
-        local_folder = self.ctxt.get_local('outputFolder')
-        cp_res = self.ctxt.platform.storage.upload_resource(local_folder, remote_work_dir)
-        self.logger.debug('Executed command of copying %s', cp_res)
-
-    def _prepare_job_arguments(self):
-        job_args = self.ctxt.get_ctxt('jobArgs')
-        remote_folder = job_args.get('outputDirectory')
-        if remote_folder is None:
-            # for dataproc we can get the tmp gs storage
-            self.logger.info('The remote directory to archive the job results is not set')
-        else:
-            # check the remote_folder exists
-            if not self.ctxt.platform.storage.resource_exists(remote_folder):
-                raise RuntimeError(f'Remote folder [{remote_folder}] does not exist.')
-        # now we can create the job object
-        # Todo: For dataproc, this can be autogenerated from cluster name
-        rapids_arg_list = self._init_rapids_arg_list()
-        ctxt_rapids_args = self.ctxt.get_ctxt('rapidsArgs')
-        jar_file_name = ctxt_rapids_args.get('jarFileName')
-        rapids_opts = ctxt_rapids_args.get('rapidsOpts')
-        if rapids_opts is not None:
-            rapids_arg_list.extend(rapids_opts)
-        # add the eventlogs at the end of all the tool options
-        rapids_arg_list.extend(self.ctxt.get_ctxt('eventLogs'))
-        class_name = self.ctxt.get_value('sparkRapids', 'mainClass')
-        remote_jar = FSUtil.build_url_from_parts(self.ctxt.get_remote('depFolder'), jar_file_name)
-        rapids_arg_obj = {
-            'jarFile': remote_jar,
-            'jarArgs': rapids_arg_list,
-            'className': class_name
-        }
-        # EMR specific things
-        platform_args = job_args.get('platformArgs')
-        spark_conf_args = {
-            'properties': {
-                'spark.executor.cores': '4',
-                'spark.executor.memory': '20g',
-                'spark.driver.cores': '4',
-                'spark.driver.memory': '8g',
-                'spark.executor.instances': '1'
-            }
-        }
-        job_properties_json = {
-            'outputDirectory': remote_folder,
-            'rapidsArgs': rapids_arg_obj,
-            'sparkConfArgs': spark_conf_args,
-            'platformArgs': platform_args
-        }
-        job_properties = RapidsJobPropContainer(prop_arg=job_properties_json,
-                                                file_load=False)
-        job_obj = self.ctxt.platform.create_spark_submission_job(job_prop=job_properties,
-                                                                 ctxt=self.ctxt)
-        job_obj.run_job()
-
     def __get_recommended_apps(self, all_rows, selected_cols=None) -> pd.DataFrame:
         speed_up_col = self.ctxt.get_value('toolOutput', 'csv', 'summaryReport',
                                            'recommendations', 'speedUp', 'columnName')
@@ -738,20 +669,6 @@ def _write_summary(self):
         if wrapper_out_content is not None:
             print(Utils.gen_multiline_str(wrapper_out_content))
 
-    def _archive_results(self):
-        archive_enabled = self.ctxt.get_ctxt('archiveToRemote')
-        if archive_enabled:
-            # we should only archive when the remote_folder is being set
-            remote_work_dir = self.ctxt.get_remote('workDir')
-            if remote_work_dir and self._rapids_jar_tool_has_output():
-                local_folder = self.ctxt.get_output_folder()
-                # TODO make sure it worth issuing the command
-                rapids_subfolder = self.ctxt.get_value_silent('toolOutput', 'subFolder')
-                exclude_folder = rapids_subfolder
-                self.ctxt.platform.storage.upload_resource(local_folder,
-                                                           remote_work_dir,
-                                                           exclude_pattern=exclude_folder)
-
     def _init_rapids_arg_list(self) -> List[str]:
         # TODO: Make sure we add this argument only for jar versions 23.02+
         return ['--platform', self.ctxt.platform.get_platform_name().replace('_', '-')]
@@ -790,19 +707,6 @@ def _generate_section_lines(self, sec_conf: dict) -> List[str]:
         return super()._generate_section_content(sec_conf)
 
 
-@dataclass
-class QualificationAsRemote(Qualification):
-    """
-    Qualification tool running on Remote cluster development.
-    """
-    description: str = 'This is the Remote Spark Qualification implementation'
-
-    def _handle_non_running_exec_cluster(self, err_msg: str) -> None:
-        # For remote cluster mode, the execution cluster must be running
-        raise RuntimeError(f'Exception verifying remote cluster: {err_msg}. \n\t'
-                           'Make sure the execution cluster passed to the CLI is currently active')
-
-
 @dataclass
 class QualificationAsLocal(Qualification):
     """
diff --git a/user_tools/src/spark_rapids_pytools/rapids/rapids_job.py b/user_tools/src/spark_rapids_pytools/rapids/rapids_job.py
index 703605d8e..b4566cdfa 100644
--- a/user_tools/src/spark_rapids_pytools/rapids/rapids_job.py
+++ b/user_tools/src/spark_rapids_pytools/rapids/rapids_job.py
@@ -18,7 +18,6 @@
 from logging import Logger
 from typing import List
 
-from spark_rapids_pytools.cloud_api.sp_types import ClusterGetAccessor
 from spark_rapids_pytools.common.prop_manager import JSONPropertiesContainer
 from spark_rapids_pytools.common.utilities import ToolLogging, Utils
 from spark_rapids_pytools.rapids.tool_ctxt import ToolContext
@@ -152,29 +151,3 @@ def _submit_job(self, cmd_args: list) -> str:
         out_std = self.exec_ctxt.platform.cli.run_sys_cmd(cmd=cmd_args,
                                                           env_vars=env_args)
         return out_std
-
-
-@dataclass
-class RapidsSubmitSparkJob(RapidsJob):
-    """
-    Class to submit a spark job to remote Cluster
-    """
-
-    def _submit_job(self, cmd_args: list) -> str:
-        env_args = self.prop_container.get_value_silent('platformArgs', 'envArgs')
-        out_std = self.exec_ctxt.platform.cli.run_sys_cmd(cmd=cmd_args,
-                                                          env_vars=env_args)
-        return out_std
-
-    def _build_submission_cmd(self) -> List[str]:
-        submit_args = {
-            'jarArgs': self._build_rapids_args(),
-            'platformSparkJobArgs': {
-                'jars': self.prop_container.get_jar_file(),
-                'class': self.prop_container.get_jar_main_class()
-            },
-        }
-        exec_cluster: ClusterGetAccessor = self.exec_ctxt.get_ctxt('execCluster')
-        cluster_name = exec_cluster.get_name()
-        return self.exec_ctxt.platform.cli.get_submit_spark_job_cmd_for_cluster(cluster_name,
-                                                                                submit_args)
diff --git a/user_tools/src/spark_rapids_pytools/wrappers/dataproc_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/dataproc_wrapper.py
index 1b641c878..f49d172ec 100644
--- a/user_tools/src/spark_rapids_pytools/wrappers/dataproc_wrapper.py
+++ b/user_tools/src/spark_rapids_pytools/wrappers/dataproc_wrapper.py
@@ -20,8 +20,7 @@
 from spark_rapids_pytools.rapids.bootstrap import Bootstrap
 from spark_rapids_pytools.rapids.diagnostic import Diagnostic
 from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
-from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType, \
-    QualificationAsRemote
+from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType
 
 
 class CliDataprocLocalMode:  # pylint: disable=too-few-public-methods
@@ -31,7 +30,6 @@ class CliDataprocLocalMode:  # pylint: disable=too-few-public-methods
 
     @staticmethod
     def qualification(cpu_cluster: str,
-                      execution_cluster: str = None,
                       eventlogs: str = None,
                       local_folder: str = None,
                       remote_folder: str = None,
@@ -52,8 +50,7 @@ def qualification(cpu_cluster: str,
         :param cpu_cluster: The Dataproc-cluster on which the Spark applications were executed. The argument
                 can be a Dataproc-cluster or a valid path to the cluster's properties file (json format)
                 generated by the gcloud-CLI.
-        :param execution_cluster: the cluster on which the Rapids tool will be running.
-        :param  eventlogs: Event log filenames or S3 storage directories
+        :param  eventlogs: Event log filenames or gcs storage directories
                 containing event logs (comma separated). If missing, the wrapper Reads the Spark's
                 property `spark.eventLog.dir` defined in `cpu_cluster`. This property should be included
                 in the output of `gcloud dataproc clusters describe`
@@ -63,14 +60,14 @@ def qualification(cpu_cluster: str,
                 ${local_folder}/qual-${EXEC_ID} where exec_id is an auto-generated unique identifier of the
                 execution. If the argument is NONE, the default value is the env variable
                 RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY if any; or the current working directory
-        :param remote_folder: An S3 folder where the output is uploaded at the end of execution.
+        :param remote_folder: A gcs folder where the output is uploaded at the end of execution.
                 If no value is provided, the output will be only available on local disk
         :param gpu_cluster: The Dataproc-cluster on which the Spark applications is planned to be migrated.
                 The argument can be a Dataproc-cluster or a valid path to the cluster's properties file
                 (json format) generated by the gcloud-CLI. If missing, the wrapper maps the dataproc machine
                 instances of the original cluster into dataproc instances that support GPU acceleration
         :param tools_jar: Path to a bundled jar including Rapids tool. The path is a local filesystem,
-                or remote S3 url. If missing, the wrapper downloads the latest rapids-4-spark-tools_*.jar
+                or remote gcs url. If missing, the wrapper downloads the latest rapids-4-spark-tools_*.jar
                 from maven repo
         :param credentials_file: The local path of JSON file that contains the application credentials.
                If missing, the wrapper looks for "GOOGLE_APPLICATION_CREDENTIALS" environment variable
@@ -102,7 +99,7 @@ def qualification(cpu_cluster: str,
         wrapper_qual_options = {
             'platformOpts': {
                 'credentialFile': credentials_file,
-                'deployMode': DeployMode.REMOTE_CLUSTER if execution_cluster else DeployMode.LOCAL,
+                'deployMode': DeployMode.LOCAL,
             },
             'migrationClustersProps': {
                 'cpuCluster': cpu_cluster,
@@ -119,18 +116,11 @@ def qualification(cpu_cluster: str,
             'toolsJar': tools_jar,
             'gpuClusterRecommendation': gpu_cluster_recommendation
         }
-        if execution_cluster:
-            tool_obj = QualificationAsRemote(platform_type=CloudPlatform.DATAPROC,
-                                             cluster=execution_cluster,
-                                             output_folder=local_folder,
-                                             wrapper_options=wrapper_qual_options,
-                                             rapids_options=rapids_options)
-        else:
-            tool_obj = QualificationAsLocal(platform_type=CloudPlatform.DATAPROC,
-                                            cluster=execution_cluster,
-                                            output_folder=local_folder,
-                                            wrapper_options=wrapper_qual_options,
-                                            rapids_options=rapids_options)
+
+        tool_obj = QualificationAsLocal(platform_type=CloudPlatform.DATAPROC,
+                                        output_folder=local_folder,
+                                        wrapper_options=wrapper_qual_options,
+                                        rapids_options=rapids_options)
         tool_obj.launch()
 
     @staticmethod
diff --git a/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py
index c0ca8e8b6..046c7c568 100644
--- a/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py
+++ b/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py
@@ -28,7 +28,6 @@ class CliOnpremLocalMode:  # pylint: disable=too-few-public-methods
 
     @staticmethod
     def qualification(cpu_cluster: str = None,
-                      execution_cluster: str = None,
                       eventlogs: str = None,
                       local_folder: str = None,
                       remote_folder: str = None,
@@ -80,7 +79,6 @@ def qualification(cpu_cluster: str = None,
             'target_platform': target_platform
         }
         tool_obj = QualificationAsLocal(platform_type=CloudPlatform.ONPREM,
-                                        cluster=execution_cluster,
                                         output_folder=local_folder,
                                         wrapper_options=wrapper_qual_options,
                                         rapids_options=rapids_options)

From 8a67add9b7dfb3672cd7e128ef4f6c87adb5c9be Mon Sep 17 00:00:00 2001
From: Cindy Jiang <47068112+cindyyuanjiang@users.noreply.github.com>
Date: Tue, 11 Jul 2023 06:18:49 -0700
Subject: [PATCH 06/14] [TASK] Implement classes and methods to support user
 tools profiling command for Databricks Azure (#423)

* add user profiling command for databricks azure

---------

Signed-off-by: Cindy Jiang <cindyj@nvidia.com>
---
 .../cloud_api/databricks_azure.py             |  5 +-
 .../wrappers/databricks_azure_wrapper.py      | 79 +++++++++++++++++++
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/databricks_azure.py b/user_tools/src/spark_rapids_pytools/cloud_api/databricks_azure.py
index e574f9923..0c1824ab1 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/databricks_azure.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/databricks_azure.py
@@ -23,7 +23,7 @@
 from spark_rapids_pytools.cloud_api.azurestorage import AzureStorageDriver
 from spark_rapids_pytools.cloud_api.databricks_azure_job import DBAzureLocalRapidsJob
 from spark_rapids_pytools.cloud_api.sp_types import CloudPlatform, CMDDriverBase, ClusterBase, ClusterNode, \
-    PlatformBase, SysInfo, GpuHWInfo, ClusterState, SparkNodeType, ClusterGetAccessor, NodeHWInfo
+    PlatformBase, SysInfo, GpuHWInfo, ClusterState, SparkNodeType, ClusterGetAccessor, NodeHWInfo, GpuDevice
 from spark_rapids_pytools.common.prop_manager import JSONPropertiesContainer
 from spark_rapids_pytools.common.sys_storage import FSUtil
 from spark_rapids_pytools.common.utilities import Utils
@@ -215,8 +215,9 @@ def _pull_gpu_hw_info(self, cli=None) -> GpuHWInfo or None:
         if self.instance_type not in gpu_info:
             return None
         gpu_instance = gpu_info[self.instance_type]['GpuInfo']['GPUs'][0]
+        gpu_device = GpuDevice.fromstring(gpu_instance['Name'])
         return GpuHWInfo(num_gpus=gpu_instance['Count'],
-                         gpu_device=gpu_instance['Name'],
+                         gpu_device=gpu_device,
                          gpu_mem=gpu_instance['MemoryInfo']['SizeInMiB'])
 
 
diff --git a/user_tools/src/spark_rapids_pytools/wrappers/databricks_azure_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/databricks_azure_wrapper.py
index 6455e2c1d..6aec99f49 100644
--- a/user_tools/src/spark_rapids_pytools/wrappers/databricks_azure_wrapper.py
+++ b/user_tools/src/spark_rapids_pytools/wrappers/databricks_azure_wrapper.py
@@ -17,6 +17,7 @@
 
 from spark_rapids_pytools.cloud_api.sp_types import DeployMode, CloudPlatform
 from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
 from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType
 
 
@@ -123,6 +124,83 @@ def qualification(cpu_cluster: str,
                              wrapper_options=wrapper_qual_options,
                              rapids_options=rapids_options).launch()
 
+    @staticmethod
+    def profiling(gpu_cluster: str = None,
+                  worker_info: str = None,
+                  eventlogs: str = None,
+                  profile: str = None,
+                  local_folder: str = None,
+                  remote_folder: str = None,
+                  tools_jar: str = None,
+                  credentials_file: str = None,
+                  jvm_heap_size: int = 24,
+                  verbose: bool = False,
+                  **rapids_options) -> None:
+        """
+        The Profiling tool analyzes both CPU or GPU generated event logs and generates information
+        which can be used for debugging and profiling Apache Spark applications.
+        :param  gpu_cluster: The Databricks-cluster on which the Spark applications were executed. The argument
+                can be a Databricks-cluster or a valid path to the cluster's properties file (json format)
+                generated by the databricks-CLI. If missing, then the argument worker_info has to be provided.
+        :param  worker_info: A path pointing to a yaml file containing the system information of a
+                worker node. It is assumed that all workers are homogenous.
+                If missing, the wrapper pulls the worker info from the "gpu_cluster".
+        :param  eventlogs: Event log filenames or ABFS (Azure Blob File System) storage directories
+                containing event logs (comma separated). If missing, the wrapper reads the Spark's
+                property `spark.eventLog.dir` defined in `gpu_cluster`. This property should be included
+                in the output of `databricks clusters get [--cluster-id CLUSTER_ID| --cluster-name CLUSTER_NAME]`.
+                Note that the wrapper will raise an exception if the property is not set.
+        :param profile: A named Databricks profile to get the settings/credentials of the Databricks CLI.
+        :param local_folder: Local work-directory path to store the output and to be used as root
+                directory for temporary folders/files. The final output will go into a subdirectory called
+                ${local_folder}/prof-${EXEC_ID} where exec_id is an auto-generated unique identifier of the
+                execution. If the argument is NONE, the default value is the env variable
+                RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY if any; or the current working directory.
+        :param remote_folder: An ABFS (Azure Blob File System) folder where the output is uploaded at the end
+                of execution. If no value is provided, the output will be only available on local disk.
+        :param tools_jar: Path to a bundled jar including Rapids tool. The path is a local filesystem,
+                or remote S3 url. If missing, the wrapper downloads the latest rapids-4-spark-tools_*.jar
+                from maven repo.
+        :param credentials_file: The local path of JSON file that contains the application credentials.
+               If missing, the wrapper looks for "DATABRICKS_CONFIG_FILE" environment variable
+               to provide the location of a credential file. The default credentials file exists as
+               "~/.databrickscfg" on Unix, Linux, or macOS.
+        :param verbose: True or False to enable verbosity to the wrapper script.
+        :param jvm_heap_size: The maximum heap size of the JVM in gigabytes.
+        :param rapids_options: A list of valid Profiling tool options.
+                Note that the wrapper ignores ["output-directory", "worker-info"] flags, and it does not support
+                multiple "spark-property" arguments.
+                For more details on Profiling tool options, please visit
+                https://nvidia.github.io/spark-rapids/docs/spark-profiling-tool.html#profiling-tool-options
+        """
+        if verbose:
+            # when debug is set to true set it in the environment.
+            ToolLogging.enable_debug_mode()
+        wrapper_prof_options = {
+            'platformOpts': {
+                # the databricks profile
+                'profile': profile,
+                'credentialFile': credentials_file,
+                'deployMode': DeployMode.LOCAL,
+            },
+            'migrationClustersProps': {
+                'gpuCluster': gpu_cluster
+            },
+            'jobSubmissionProps': {
+                'remoteFolder': remote_folder,
+                'platformArgs': {
+                    'jvmMaxHeapSize': jvm_heap_size
+                }
+            },
+            'eventlogs': eventlogs,
+            'toolsJar': tools_jar,
+            'autoTunerFileInput': worker_info
+        }
+        ProfilingAsLocal(platform_type=CloudPlatform.DATABRICKS_AZURE,
+                         output_folder=local_folder,
+                         wrapper_options=wrapper_prof_options,
+                         rapids_options=rapids_options).launch()
+
 
 class DBAzureWrapper:  # pylint: disable=too-few-public-methods
     """
@@ -131,3 +209,4 @@ class DBAzureWrapper:  # pylint: disable=too-few-public-methods
 
     def __init__(self):
         self.qualification = CliDBAzureLocalMode.qualification
+        self.profiling = CliDBAzureLocalMode.profiling

From 84a2cf0bcc81953e19be91eee6a8b1a32bc40d24 Mon Sep 17 00:00:00 2001
From: Partho Sarthi <psarthi@nvidia.com>
Date: Tue, 11 Jul 2023 11:26:35 -0700
Subject: [PATCH 07/14] Add platform argument in profiling tool for custom
 tuning based on the platform (#414)

* Add platform argument in profiling tool for custom tuning based on environment

---------

Signed-off-by: Partho Sarthi <psarthi@nvidia.com>
---
 core/docs/spark-profiling-tool.md             |  4 ++++
 .../rapids/tool/profiling/AutoTuner.scala     | 22 +++++++++++--------
 .../rapids/tool/profiling/ProfileArgs.scala   |  6 +++++
 .../rapids/tool/profiling/Profiler.scala      |  5 ++++-
 4 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/core/docs/spark-profiling-tool.md b/core/docs/spark-profiling-tool.md
index 519ca81ae..ffcb1f5da 100644
--- a/core/docs/spark-profiling-tool.md
+++ b/core/docs/spark-profiling-tool.md
@@ -626,6 +626,10 @@ Usage: java -cp rapids-4-spark-tools_2.12-<version>.jar:$SPARK_HOME/jars/*
   -p, --print-plans               Print the SQL plans to a file named
                                   'planDescriptions.log'.
                                   Default is false.
+      --platform  <arg>           Cluster platform where Spark GPU workloads were
+                                  executed. Options include onprem, dataproc, emr,
+                                  databricks.
+                                  Default is onprem.
   -s, --start-app-time  <arg>     Filter event logs whose application start
                                   occurred within the past specified time
                                   period. Valid time periods are
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala
index fe1ea34cc..271a9aee4 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala
@@ -328,7 +328,8 @@ class RecommendationEntry(val name: String,
  */
 class AutoTuner(
     val clusterProps: ClusterProperties,
-    val appInfoProvider: AppSummaryInfoBaseProvider)  extends Logging {
+    val appInfoProvider: AppSummaryInfoBaseProvider,
+    val platform: String)  extends Logging {
 
   import AutoTuner._
 
@@ -987,9 +988,10 @@ object AutoTuner extends Logging {
 
   private def handleException(
       ex: Exception,
-      appInfo: AppSummaryInfoBaseProvider): AutoTuner = {
+      appInfo: AppSummaryInfoBaseProvider,
+      platform: String): AutoTuner = {
     logError("Exception: " + ex.getStackTrace.mkString("Array(", ", ", ")"))
-    val tuning = new AutoTuner(new ClusterProperties(), appInfo)
+    val tuning = new AutoTuner(new ClusterProperties(), appInfo, platform)
     val msg = ex match {
       case cEx: ConstructorException => cEx.getContext
       case _ => if (ex.getCause != null) ex.getCause.toString else ex.toString
@@ -1033,25 +1035,27 @@ object AutoTuner extends Logging {
    */
   def buildAutoTunerFromProps(
       clusterProps: String,
-      singleAppProvider: AppSummaryInfoBaseProvider): AutoTuner = {
+      singleAppProvider: AppSummaryInfoBaseProvider,
+      platform: String = Profiler.DEFAULT_PLATFORM): AutoTuner = {
     try {
       val clusterPropsOpt = loadClusterPropertiesFromContent(clusterProps)
-      new AutoTuner(clusterPropsOpt.getOrElse(new ClusterProperties()), singleAppProvider)
+      new AutoTuner(clusterPropsOpt.getOrElse(new ClusterProperties()), singleAppProvider, platform)
     } catch {
       case e: Exception =>
-        handleException(e, singleAppProvider)
+        handleException(e, singleAppProvider, platform)
     }
   }
 
   def buildAutoTuner(
       filePath: String,
-      singleAppProvider: AppSummaryInfoBaseProvider): AutoTuner = {
+      singleAppProvider: AppSummaryInfoBaseProvider,
+      platform: String = Profiler.DEFAULT_PLATFORM): AutoTuner = {
     try {
       val clusterPropsOpt = loadClusterProps(filePath)
-      new AutoTuner(clusterPropsOpt.getOrElse(new ClusterProperties()), singleAppProvider)
+      new AutoTuner(clusterPropsOpt.getOrElse(new ClusterProperties()), singleAppProvider, platform)
     } catch {
       case e: Exception =>
-        handleException(e, singleAppProvider)
+        handleException(e, singleAppProvider, platform)
     }
   }
 
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileArgs.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileArgs.scala
index a942a8e8e..b4201ee2c 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileArgs.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileArgs.scala
@@ -63,6 +63,12 @@ Usage: java -cp rapids-4-spark-tools_2.12-<version>.jar:$SPARK_HOME/jars/*
     opt[Boolean](required = false,
       descr = "Print the SQL plans to a file named 'planDescriptions.log'." +
         " Default is false.")
+  val platform: ScallopOption[String] =
+    opt[String](required = false,
+      descr = "Cluster platform where Spark GPU workloads were executed. Options include " +
+        "onprem, dataproc, emr, databricks." +
+        " Default is onprem.",
+      default = Some(Profiler.DEFAULT_PLATFORM))
   val generateTimeline: ScallopOption[Boolean] =
     opt[Boolean](required = false,
       descr = "Write an SVG graph out for the full application timeline.")
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala
index e4607207f..ca285eaf9 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala
@@ -470,8 +470,9 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs) extends Logging
 
       if (useAutoTuner) {
         val workerInfoPath = appArgs.workerInfo.getOrElse(AutoTuner.DEFAULT_WORKER_INFO_PATH)
+        val platform = appArgs.platform.getOrElse(Profiler.DEFAULT_PLATFORM)
         val autoTuner: AutoTuner = AutoTuner.buildAutoTuner(workerInfoPath,
-          new SingleAppSummaryInfoProvider(app))
+          new SingleAppSummaryInfoProvider(app), platform)
         // the autotuner allows skipping some properties
         // e.g. getRecommendedProperties(Some(Seq("spark.executor.instances"))) skips the
         // recommendation related to executor instances.
@@ -489,6 +490,8 @@ object Profiler {
   val COMPARE_LOG_FILE_NAME_PREFIX = "rapids_4_spark_tools_compare"
   val COMBINED_LOG_FILE_NAME_PREFIX = "rapids_4_spark_tools_combined"
   val SUBDIR = "rapids_4_spark_profile"
+  val DEFAULT_PLATFORM = "onprem"
+
   def getAutoTunerResultsAsString(props: Seq[RecommendedPropertyResult],
       comments: Seq[RecommendedCommentResult]): String = {
     val propStr = if (props.nonEmpty) {

From 3d43f1541643748634111a42636193c00a2b8f77 Mon Sep 17 00:00:00 2001
From: Cindy Jiang <47068112+cindyyuanjiang@users.noreply.github.com>
Date: Tue, 11 Jul 2023 16:59:15 -0700
Subject: [PATCH 08/14] [DOC] Add documentation for running user tools
 profiling command for Databricks Azure (#430)

* add documentation for user profiling tool for databricks azure

Signed-off-by: Cindy Jiang <cindyj@nvidia.com>

* updated index.md doc

Signed-off-by: Cindy Jiang <cindyj@nvidia.com>

* addressed review feedback

Signed-off-by: Cindy Jiang <cindyj@nvidia.com>

---------

Signed-off-by: Cindy Jiang <cindyj@nvidia.com>
---
 user_tools/docs/index.md                      |   5 +-
 user_tools/docs/user-tools-databricks-aws.md  |   4 +-
 .../docs/user-tools-databricks-azure.md       | 155 +++++++++++++++++-
 3 files changed, 155 insertions(+), 9 deletions(-)

diff --git a/user_tools/docs/index.md b/user_tools/docs/index.md
index 0fd4ad4c0..d37d95ead 100644
--- a/user_tools/docs/index.md
+++ b/user_tools/docs/index.md
@@ -97,7 +97,7 @@ The following table summarizes the commands supported for each cloud platform:
 | Databricks_AWS   | qualification | spark_rapids_user_tools \               |  23.04+  |
 |                  |               |   databricks-aws qualification [ARGS]   |          |
 |                  +---------------+-----------------------------------------+----------+
-|                  | profiling     | spark_rapids_user_tools \               |  23.08+  |
+|                  | profiling     | spark_rapids_user_tools \               | 23.06.1+ |
 |                  |               |   databricks-aws profiling [ARGS]       |          |
 |                  +---------------+-----------------------------------------+----------+
 |                  | bootstrap     |               N/A                       |    TBD   |
@@ -107,7 +107,8 @@ The following table summarizes the commands supported for each cloud platform:
 | Databricks_Azure | qualification | spark_rapids_user_tools \               |  23.06+  |
 |                  |               |   databricks-azure qualification [ARGS] |          |
 |                  +---------------+-----------------------------------------+----------+
-|                  | profiling     |               N/A                       |    TBD   |
+|                  | profiling     | spark_rapids_user_tools \               | 23.06.2+ |
+|                  |               |   databricks-azure profiling [ARGS]     |          |
 |                  +---------------+-----------------------------------------+----------+
 |                  | bootstrap     |               N/A                       |    TBD   |
 |                  +---------------+-----------------------------------------+----------+
diff --git a/user_tools/docs/user-tools-databricks-aws.md b/user_tools/docs/user-tools-databricks-aws.md
index ffd63d212..d415fee34 100644
--- a/user_tools/docs/user-tools-databricks-aws.md
+++ b/user_tools/docs/user-tools-databricks-aws.md
@@ -95,8 +95,7 @@ A typical workflow to successfully run the `qualification` command in local mode
       commands can access the S3 resources `LOGS_BUCKET`.
    4. installs `spark_rapids_user_tools`
 3. If the results of the wrapper need to be stored on S3, then another S3 uri is required `REMOTE_FOLDER=s3://OUT_BUCKET/`
-4. User defines the Databricks-cluster on which the Spark application were running. Note that the cluster does not have to be
-   active; but it has to be visible by the Databricks CLI (i.e., can run `databricks clusters get --cluster-name`).
+4. User defines the Databricks-cluster on which the Spark application were running. Note that the cluster does not have to be active; but it has to be visible by the Databricks CLI (i.e., can run `databricks clusters get --cluster-name`).
 5. The following script runs qualification by passing an S3 remote directory to store the output:
 
    ```
@@ -171,6 +170,7 @@ spark_rapids_user_tools databricks-aws profiling [options]
 spark_rapids_user_tools databricks-aws profiling -- --help
 ```
 
+The local deployment runs on the local development machine. It requires:
 1. Installing and configuring the Databricks and AWS CLI (`databricks` and `aws` commands)
 2. Java 1.8+ development environment
 3. Internet access to download JAR dependencies from mvn: `spark-*.jar`, `hadoop-aws-*.jar`, and `aws-java-sdk-bundle*.jar`
diff --git a/user_tools/docs/user-tools-databricks-azure.md b/user_tools/docs/user-tools-databricks-azure.md
index ad11bb59b..6d6df86a0 100644
--- a/user_tools/docs/user-tools-databricks-azure.md
+++ b/user_tools/docs/user-tools-databricks-azure.md
@@ -92,12 +92,11 @@ A typical workflow to successfully run the `qualification` command in local mode
    2. installs Databricks CLI and configures the profile and the credentials to make sure the
       access credentials are stored in the file `~/.databrickscfg` on Unix, Linux, or macOS,
       or in another file defined by environment variable `DATABRICKS_CONFIG_FILE`.
-   3. installs Azure CLI and configures the profile and the credentials to make sure the Azure CLI
-      commands can access the ABFS resources `LOGS_CONTAINER` and `OUT_CONTAINER`.
+   3. installs Azure CLI and configures the credentials to make sure the Azure CLI
+      commands can access the ABFS resources (i.e. storage container `LOGS_CONTAINER` which stores the event logs).
    4. installs `spark_rapids_user_tools`
 3. If the results of the wrapper need to be stored on ABFS, then another ABFS uri is required `REMOTE_FOLDER=abfss://OUT_BUCKET/`
-4. User defines the Databricks-cluster on which the Spark application were running. Note that the cluster does not have to be
-   active; but it has to be visible by the Databricks CLI (i.e., can run `databricks clusters get --cluster-name`).
+4. User defines the Databricks-cluster on which the Spark application were running. Note that the cluster does not have to be active; but it has to be visible by the Databricks CLI (i.e., can run `databricks clusters get --cluster-name`).
 5. The following script runs qualification by passing an ABFS remote directory to store the output:
 
    ```
@@ -105,7 +104,7 @@ A typical workflow to successfully run the `qualification` command in local mode
    export RAPIDS_USER_TOOLS_CACHE_FOLDER=my_cache_folder
    export EVENTLOGS=abfss://LOGS_CONTAINER/eventlogs/
    export CLUSTER_NAME=my-databricks-cpu-cluster
-   export REMOTE_FOLDER=abfss://OUT_CONTAINER/wrapper_output
+   export REMOTE_FOLDER=abfss://OUT_BUCKET/wrapper_output
    
    spark_rapids_user_tools databricks-azure qualification \
       --eventlogs $EVENTLOGS \
@@ -162,3 +161,149 @@ The command creates a directory with UUID that contains the following:
         ├── rapids_4_spark_qualification_output_stages.csv
         └── ui
     ```
+
+## Profiling command
+
+### Local deployment
+
+```
+spark_rapids_user_tools databricks-azure profiling [options]
+spark_rapids_user_tools databricks-azure profiling -- --help
+```
+
+The local deployment runs on the local development machine. It requires:
+1. Installing and configuring the Databricks and Azure CLI
+2. Java 1.8+ development environment
+3. Internet access to download JAR dependencies from mvn: `spark-*.jar`, and `hadoop-azure-*.jar`
+4. Dependencies are cached on the local disk to reduce the overhead of the download.
+
+#### Command options
+
+| Option               | Description                                                                                                                                                                                                                                                            | Default                                                                                                                                                                                                                                         | Required |
+|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
+| **gpu_cluster**      | The Databricks-cluster on which the Apache Spark applications were executed. Accepted values are an Databricks-cluster name, or a valid path to the cluster properties file (json format) generated by Databricks CLI command `databricks clusters get --cluster-name` | If missing, then the argument `worker_info` has to be provided.                                                                                                                                                                                 |     N    |
+| **worker_info**      | A path pointing to a yaml file containing the system information of a worker node. It is assumed that all workers are homogenous. The format of the file is described in the following section.                                                                        | None                                                                                                                                                                                                                                            |     N    |
+| **eventlogs**        | A comma seperated list of ABFS urls pointing to event logs or ABFS directory                                                                                                                                                                                           | Reads the Spark's property `spark.eventLog.dir` defined in `gpu_cluster`. This property should be included in the output of `databricks clusters get --cluster-name`. Note that the wrapper will raise an exception if the property is not set. |     N    |
+| **remote_folder**    | The ABFS folder where the output of the wrapper's output is copied. If missing, the output will be available only on local disk                                                                                                                                        | N/A                                                                                                                                                                                                                                             |     N    |
+| **local_folder**     | Local work-directory path to store the output and to be used as root directory for temporary folders/files. The final output will go into a subdirectory named `prof-${EXEC_ID}` where `exec_id` is an auto-generated unique identifier of the execution.              | If the argument is NONE, the default value is the env variable `RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY` if any; or the current working directory.                                                                                                   |     N    |
+| **profile**          | A named Databricks profile that you can specify to get the settings/credentials of the Databricks account                                                                                                                                                              | "DEFAULT"                                                                                                                                                                                                                                       |     N    |
+| **jvm_heap_size**    | The maximum heap size of the JVM in gigabytes                                                                                                                                                                                                                          | 24                                                                                                                                                                                                                                              |     N    |
+| **tools_jar**        | Path to a bundled jar including RAPIDS tool. The path is a local filesystem, or remote ABFS url                                                                                                                                                                        | Downloads the latest `rapids-4-spark-tools_*.jar` from mvn repo                                                                                                                                                                                 |     N    |
+| **credentials_file** | The local path of JSON file that contains the application credentials                                                                                                                                                                                                  | If missing, loads the env variable `DATABRICKS_CONFIG_FILE` if any. Otherwise, it uses the default path `~/.databrickscfg` on Unix, Linux, or macOS                                                                                             |     N    |
+| **verbose**          | True or False to enable verbosity to the wrapper script                                                                                                                                                                                                                | False if `RAPIDS_USER_TOOLS_LOG_DEBUG` is not set                                                                                                                                                                                               |     N    |
+| **rapids_options**** | A list of valid [Profiling tool options](../../core/docs/spark-profiling-tool.md#qualification-tool-options). Note that (`output-directory`, `auto-tuner`, `combined`) flags are ignored                                                                               | N/A                                                                                                                                                                                                                                             |     N    |
+
+If the CLI does not provide an argument `gpu_cluster`, then a valid path to yaml file must be
+provided through the arg `worker_info`.
+The `worker_info` is a yaml file that contains the HW description of the workers. It must contain
+the following properties:
+- `system.numCores`: number of cores of a single worker node
+- `system.memory`: RAM size in MiB of a single node
+- `system.numWorkers`: number of workers
+- `gpu.name`: the accelerator installed on the worker node
+- `gpu.memory`: memory size of the accelerator in MiB. (i.e., 16GB for Nvidia-T4)
+- `softwareProperties`: Spark default-configurations of the target cluster
+
+An example of valid `worker_info.yaml`:
+
+  ```
+  system:
+    numCores: 32
+    memory: 212992MiB
+    numWorkers: 5
+  gpu:
+    memory: 15109MiB
+    count: 4
+    name: T4
+  softwareProperties:
+    spark.driver.maxResultSize: 7680m
+    spark.driver.memory: 15360m
+    spark.executor.cores: '8'
+    spark.executor.instances: '2'
+    spark.executor.memory: 47222m
+    spark.executorEnv.OPENBLAS_NUM_THREADS: '1'
+    spark.scheduler.mode: FAIR
+    spark.sql.cbo.enabled: 'true'
+    spark.ui.port: '0'
+    spark.yarn.am.memory: 640m
+  ```
+
+#### Use case scenario
+
+A typical workflow to successfully run the `profiling` command in local mode is described as follows:
+
+1. Store the Apache Spark event logs in ABFS folder.
+2. A user sets up his development machine:
+   1. configures Java
+   2. installs Databricks CLI and configures the profile and the credentials to make sure the
+      access credentials are stored in the file `~/.databrickscfg` on Unix, Linux, or macOS,
+      or in another file defined by environment variable `DATABRICKS_CONFIG_FILE`.
+   3. installs Azure CLI and configures the credentials to make sure the Azure CLI
+      commands can access the ABFS resources (i.e. storage container `LOGS_CONTAINER` which stores the event logs).
+   4. installs `spark_rapids_user_tools`
+3. If the results of the wrapper need to be stored on ABFS, then another ABFS uri is required `REMOTE_FOLDER=abfss://OUT_BUCKET/`
+4. Depending on the accessibility of the cluster properties, the user chooses one of the 2 cases below (_"Case-A"_, and _"Case-B"_) to trigger the CLI.
+
+For each successful execution, the wrapper generates a new directory in the format of
+`prof_<YYYYmmddHHmmss>_<0x%08X>`. The directory contains `profiling_summary.log` in addition to
+the actual folder of the RAPIDS Profiling tool. The directory will be mirrored to ABFS folder if the
+argument `--remote_folder` was a valid ABFS path.
+
+   ```
+    ./prof_<YYYYmmddHHmmss>_<0x%08X>/profiling_summary.log
+    ./prof_<YYYYmmddHHmmss>_<0x%08X>/rapids_4_spark_profile/
+   ```
+
+**Case-A: A gpu-cluster property file is accessible:**
+
+A cluster property is still accessible if one of the following conditions applies:
+
+1. The cluster is listed by the `databricks clusters get --cluster-name $CLUSTER_NAME` cmd. In this case, the CLI will be triggered by providing
+   `--gpu_cluster $CLUSTER_NAME`
+
+       ```
+       # run the command using the GPU cluster name
+       export RAPIDS_USER_TOOLS_CACHE_FOLDER=my_cache_folder
+       export EVENTLOGS=abfss://LOGS_CONTAINER/eventlogs/
+       export CLUSTER_NAME=my-databricks-gpu-cluster
+       export REMOTE_FOLDER=abfss://OUT_BUCKET/wrapper_output
+       
+       spark_rapids_user_tools databricks-azure profiling \
+          --eventlogs $EVENTLOGS \
+          --gpu_cluster $CLUSTER_NAME \
+          --remote_folder $REMOTE_FOLDER
+       ```
+2. The cluster properties file is accessible on local disk or a valid ABFS path.
+
+   ```
+   $> export CLUSTER_PROPS_FILE=cluster-props.json
+   $> databricks clusters get --cluster-name $CLUSTER_NAME > $CLUSTER_PROPS_FILE
+   ```
+   Trigger the CLI by providing the path to the properties file `--gpu_cluster $CLUSTER_PROPS_FILE`
+
+   ```
+   $> spark_rapids_user_tools databricks-azure profiling \
+        --eventlogs $EVENTLOGS \
+        --gpu_cluster $CLUSTER_PROPS_FILE \
+        --remote_folder $REMOTE_FOLDER
+   ```
+
+**Case-B: GPU cluster information is missing:**
+
+In this scenario, users can write down a simple yaml file to describe the shape of the worker nodes.  
+This case is relevant to the following plans:
+1. Users who might want to experiment with different configurations before deciding on the final
+   cluster shape. 
+2. Users who have no access to the properties of the cluster.
+
+The CLI is triggered by providing the location where the yaml file is stored `--worker_info $WORKER_INFO_PATH`
+
+    ```
+    # First, create a yaml file as described in previous section
+    $> export WORKER_INFO_PATH=worker-info.yaml
+    # Run the profiling cmd
+    $> spark_rapids_user_tools databricks-azure profiling \
+            --eventlogs $EVENTLOGS \
+            --worker_info $WORKER_INFO_PATH \
+            --remote_folder $REMOTE_FOLDER
+    ```

From 780b6e996ff9188d4ed30d448dc399047afe85c6 Mon Sep 17 00:00:00 2001
From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com>
Date: Wed, 12 Jul 2023 10:15:41 -0500
Subject: [PATCH 09/14] update diagnostic unit-tests to support var directory
 (#429)

Signed-off-by: Ahmed Hussein (amahussein) <a@ahussein.me>

Fixes #428

- diagnostics unit-tests fail on environment that stores tmp-files in
  /var/*
- update the pattern to match either `/tmp/**` or `/var/**`
---
 user_tools/tests/test_diagnostic.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/user_tools/tests/test_diagnostic.py b/user_tools/tests/test_diagnostic.py
index 3cd32e600..2db3c1017 100644
--- a/user_tools/tests/test_diagnostic.py
+++ b/user_tools/tests/test_diagnostic.py
@@ -67,7 +67,7 @@ def test_info_collect(self, build_mock, cloud, capsys):
             assert len(build_mock.call_args_list) == 12
 
         _, stderr = capsys.readouterr()
-        assert re.match(r".*Archive '/tmp/.*/diag_.*\.tar' is successfully created\..*", stderr, re.DOTALL)
+        assert re.match(r".*Archive '/(tmp|var)/.*/diag_.*\.tar' is successfully created\..*", stderr, re.DOTALL)
 
     @patch('spark_rapids_pytools.common.utilities.SysCmd.build')
     def test_thread_num(self, build_mock, cloud, capsys):
@@ -91,7 +91,7 @@ def test_thread_num(self, build_mock, cloud, capsys):
         _, stderr = capsys.readouterr()
 
         assert 'Set thread number as: 7' in stderr
-        assert re.match(r".*Archive '/tmp/.*/diag_.*\.tar' is successfully created\..*", stderr, re.DOTALL)
+        assert re.match(r".*Archive '/(tmp|var)/.*/diag_.*\.tar' is successfully created\..*", stderr, re.DOTALL)
 
     @patch('spark_rapids_pytools.common.utilities.SysCmd.build')
     @pytest.mark.parametrize('thread_num', ['0', '11', '123'])
@@ -201,7 +201,7 @@ def test_auto_confirm(self, build_mock, cloud, user_input, capsys):
             assert len(build_mock.call_args_list) == 12
 
         _, stderr = capsys.readouterr()
-        assert re.match(r".*Archive '/tmp/.*/diag_.*\.tar' is successfully created\..*", stderr, re.DOTALL)
+        assert re.match(r".*Archive '/(tmp|var)/.*/diag_.*\.tar' is successfully created\..*", stderr, re.DOTALL)
 
     @patch('spark_rapids_pytools.common.utilities.SysCmd.build')
     @pytest.mark.parametrize('user_input', ['', 'n', 'no', 'NO', 'nO'])

From 7e05ba19e953eeb823e730d40fff6d626612bd45 Mon Sep 17 00:00:00 2001
From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com>
Date: Wed, 12 Jul 2023 12:21:01 -0500
Subject: [PATCH 10/14] Fix broken tables and code blocks in profiling-tool
 docs (#435)

Signed-off-by: Ahmed Hussein (amahussein) <a@ahussein.me>
---
 core/docs/spark-profiling-tool.md | 607 +++++++++++++++---------------
 1 file changed, 309 insertions(+), 298 deletions(-)

diff --git a/core/docs/spark-profiling-tool.md b/core/docs/spark-profiling-tool.md
index ffcb1f5da..cd422de05 100644
--- a/core/docs/spark-profiling-tool.md
+++ b/core/docs/spark-profiling-tool.md
@@ -3,6 +3,7 @@ layout: page
 title: Profiling Tool
 nav_order: 9
 ---
+
 # Profiling Tool
 
 The Profiling tool analyzes both CPU or GPU generated event logs and generates information 
@@ -17,6 +18,7 @@ configurations based on the worker's information (see [Auto-Tuner support](#auto
 ## How to use the Profiling tool
 
 ### Prerequisites
+
 - Java 8 or above, Spark 3.0.1+ jars
 - Spark event log(s) from Spark 2.0 or above version. Supports both rolled and compressed event logs 
   with `.lz4`, `.lzf`, `.snappy` and `.zstd` suffixes as well as 
@@ -30,19 +32,23 @@ or can be found in the location specified by `spark.eventLog.dir`. See the
 more information.
 
 ### Step 1a: Download the tools jar
+
 - Download the latest RAPIDS Accelerator for Apache Spark tools jar from [Maven repository](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark-tools_2.12/)
 
 If you want to compile the jar, please refer to the instructions [here](./spark-qualification-tool.md#How-to-compile-the-tools-jar).
 
 ### Step 1b: Download the Apache Spark 3 distribution
+
 The Profiling tool requires the Spark 3.x jars to be able to run but does not need an Apache Spark run time.
 If you do not already have Spark 3.x installed, 
 you can download the Spark distribution to any machine and include the jars in the classpath.
 - [Download Apache Spark 3.x](http://spark.apache.org/downloads.html)
 
 ### Step 2 How to run the Profiling tool
+
 The profiling tool parses the Spark CPU or GPU event log(s) and creates an output report.
 If necessary, extract the Spark distribution into a local directory.  To run the tool, please note the following:
+
 - Either set `SPARK_HOME` to point to that local directory or add it to the
 classpath `java -cp toolsJar:pathToSparkJars/*:...` when you run the Profiling tool.
 - Acceptable input event log paths are files or directories containing spark events logs
@@ -51,6 +57,7 @@ in the local filesystem, HDFS, S3 or mixed.
 the java heap size using `-Xmx` option.  For instance, to specify 30 GB heap size `java -Xmx30g`.
 
 There are 3 modes of operation for the Profiling tool:
+
  1. Collection Mode: 
     Collection mode is the default mode when no other options are specified it simply collects information
     on each application individually and outputs a file per application
@@ -95,6 +102,7 @@ There are 3 modes of operation for the Profiling tool:
 Run `--help` for more information.
 
 ## Understanding Profiling tool detailed output and examples
+
 The default output location is the current directory. 
 The output location can be changed using the `--output-directory` option.
 The output goes into a sub-directory named `rapids_4_spark_profile/` inside that output location.
@@ -114,13 +122,15 @@ Optionally if the `--csv` option is specified then it creates a csv file for eac
 corresponding sub-directory.
 
 Additional notes:
+
 - There is a 100 characters limit for each output column. If the result of the column exceeds this limit, it is suffixed with
 `...` for that column.
 - ResourceProfile ids are parsed for the event logs that are from Spark 3.1 or later.  A ResourceProfile allows the user
 to specify executor and task requirements for an RDD that will get applied during a stage.  This allows the user to change
 the resource requirements between stages.
 
-#### A. Collect Information or Compare Information(if more than 1 event logs are as input and option --compare is specified)
+#### A. Collect Information or Compare Information (if more than 1 event logs are as input and option --compare is specified)
+
 - Application information
 - Application log path mapping
 - Data Source information
@@ -141,304 +151,300 @@ the resource requirements between stages.
 
 For example, GPU run vs CPU run performance comparison or different runs with different parameters.
 
-We can input multiple Spark event logs and this tool can compare environments, executors, Rapids related Spark parameters,
+We can input multiple Spark event logs and this tool can compare environments, executors, and Rapids related Spark parameters.
 
 - Compare the durations/versions/gpuMode on or off:
 
+- Application information:
 
-- Application information
-```
-Application Information:
-
-+--------+-----------+-----------------------+---------+-------------+-------------+--------+-----------+------------+-------------+
-|appIndex|appName    |appId                  |sparkUser|startTime    |endTime      |duration|durationStr|sparkVersion|pluginEnabled|
-+--------+-----------+-----------------------+---------+-------------+-------------+--------+-----------+------------+-------------+
-|1       |Spark shell|app-20210329165943-0103|user1    |1617037182848|1617037490515|307667  |5.1 min    |3.0.1       |false        |
-|2       |Spark shell|app-20210329170243-0018|user1    |1617037362324|1617038578035|1215711 |20 min     |3.0.1       |true         |
-+--------+-----------+-----------------------+---------+-------------+-------------+--------+-----------+------------+-------------+
-```
+  ```
+  +--------+-----------+-----------------------+---------+-------------+-------------+--------+-----------+------------+-------------+
+  |appIndex|appName    |appId                  |sparkUser|startTime    |endTime      |duration|durationStr|sparkVersion|pluginEnabled|
+  +--------+-----------+-----------------------+---------+-------------+-------------+--------+-----------+------------+-------------+
+  |1       |Spark shell|app-20210329165943-0103|user1    |1617037182848|1617037490515|307667  |5.1 min    |3.0.1       |false        |
+  |2       |Spark shell|app-20210329170243-0018|user1    |1617037362324|1617038578035|1215711 |20 min     |3.0.1       |true         |
+  +--------+-----------+-----------------------+---------+-------------+-------------+--------+-----------+------------+-------------+
+  ```
 
-- Data Source information
-The details of this output differ between using a Spark Data Source V1 and Data Source V2 reader. 
-The Data Source V2 truncates the schema, so if you see `...`, then
-the full schema is not available.
+- Data Source information:  
+  The details of this output differ between using a Spark Data Source V1 and Data Source V2 reader. 
+  The Data Source V2 truncates the schema, so if you see `...`, then
+  the full schema is not available.
 
-```
-Data Source Information:
-+--------+-----+-------+---------------------------------------------------------------------------------------------------------------------------+-----------------+---------------------------------------------------------------------------------------------+
-|appIndex|sqlID|format |location                                                                                                                   |pushedFilters    |schema                                                                                       |
-+--------+-----+-------+---------------------------------------------------------------------------------------------------------------------------+-----------------+---------------------------------------------------------------------------------------------+
-|1       |0    |Text   |InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/integration_tests/src/test/resources/trucks-comments.csv]|[]               |value:string                                                                                 |
-|1       |1    |csv    |Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/integration_tests/src/test/re...               |PushedFilters: []|_c0:string                                                                                   |
-|1       |2    |parquet|Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/lotscolumnsout]                                |PushedFilters: []|loan_id:bigint,monthly_reporting_period:string,servicer:string,interest_rate:double,curren...|
-|1       |3    |parquet|Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/lotscolumnsout]                                |PushedFilters: []|loan_id:bigint,monthly_reporting_period:string,servicer:string,interest_rate:double,curren...|
-|1       |4    |orc    |Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/logscolumsout.orc]                             |PushedFilters: []|loan_id:bigint,monthly_reporting_period:string,servicer:string,interest_rate:double,curren...|
-|1       |5    |orc    |Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/logscolumsout.orc]                             |PushedFilters: []|loan_id:bigint,monthly_reporting_period:string,servicer:string,interest_rate:double,curren...|
-|1       |6    |json   |Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/lotsofcolumnsout.json]                         |PushedFilters: []|adj_remaining_months_to_maturity:double,asset_recovery_costs:double,credit_enhancement_pro...|
-|1       |7    |json   |Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/lotsofcolumnsout.json]                         |PushedFilters: []|adj_remaining_months_to_maturity:double,asset_recovery_costs:double,credit_enhancement_pro...|
-|1       |8    |json   |Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/lotsofcolumnsout.json]                         |PushedFilters: []|adj_remaining_months_to_maturity:double,asset_recovery_costs:double,credit_enhancement_pro...|
-|1       |9    |JDBC   |unknown                                                                                                                    |unknown          |                                                                                             |
-+--------+-----+-------+---------------------------------------------------------------------------------------------------------------------------+-----------------+---------------------------------------------------------------------------------------------+
-```
+  ```
+  +--------+-----+-------+---------------------------------------------------------------------------------------------------------------------------+-----------------+---------------------------------------------------------------------------------------------+
+  |appIndex|sqlID|format |location                                                                                                                   |pushedFilters    |schema                                                                                       |
+  +--------+-----+-------+---------------------------------------------------------------------------------------------------------------------------+-----------------+---------------------------------------------------------------------------------------------+
+  |1       |0    |Text   |InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/integration_tests/src/test/resources/trucks-comments.csv]|[]               |value:string                                                                                 |
+  |1       |1    |csv    |Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/integration_tests/src/test/re...               |PushedFilters: []|_c0:string                                                                                   |
+  |1       |2    |parquet|Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/lotscolumnsout]                                |PushedFilters: []|loan_id:bigint,monthly_reporting_period:string,servicer:string,interest_rate:double,curren...|
+  |1       |3    |parquet|Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/lotscolumnsout]                                |PushedFilters: []|loan_id:bigint,monthly_reporting_period:string,servicer:string,interest_rate:double,curren...|
+  |1       |4    |orc    |Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/logscolumsout.orc]                             |PushedFilters: []|loan_id:bigint,monthly_reporting_period:string,servicer:string,interest_rate:double,curren...|
+  |1       |5    |orc    |Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/logscolumsout.orc]                             |PushedFilters: []|loan_id:bigint,monthly_reporting_period:string,servicer:string,interest_rate:double,curren...|
+  |1       |6    |json   |Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/lotsofcolumnsout.json]                         |PushedFilters: []|adj_remaining_months_to_maturity:double,asset_recovery_costs:double,credit_enhancement_pro...|
+  |1       |7    |json   |Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/lotsofcolumnsout.json]                         |PushedFilters: []|adj_remaining_months_to_maturity:double,asset_recovery_costs:double,credit_enhancement_pro...|
+  |1       |8    |json   |Location: InMemoryFileIndex[file:/home/user1/workspace/spark-rapids-another/lotsofcolumnsout.json]                         |PushedFilters: []|adj_remaining_months_to_maturity:double,asset_recovery_costs:double,credit_enhancement_pro...|
+  |1       |9    |JDBC   |unknown                                                                                                                    |unknown          |                                                                                             |
+  +--------+-----+-------+---------------------------------------------------------------------------------------------------------------------------+-----------------+---------------------------------------------------------------------------------------------+
+  ```
 
 - Executor information:
 
-```
-Executor Information:
-+--------+-----------------+------------+-------------+-----------+------------+-------------+--------------+------------------+---------------+-------+-------+
-|appIndex|resourceProfileId|numExecutors|executorCores|maxMem     |maxOnHeapMem|maxOffHeapMem|executorMemory|numGpusPerExecutor|executorOffHeap|taskCpu|taskGpu|
-+--------+-----------------+------------+-------------+-----------+------------+-------------+--------------+------------------+---------------+-------+-------+
-|1       |0                |1           |4            |11264537395|11264537395 |0            |20480         |1                 |0              |1      |0.0    |
-|1       |1                |2           |2            |3247335014 |3247335014  |0            |6144          |2                 |0              |2      |2.0    |
-+--------+-----------------+------------+-------------+-----------+------------+-------------+-------------+--------------+------------------+---------------+-------+-------+
-```
+  ```
+  +--------+-----------------+------------+-------------+-----------+------------+-------------+--------------+------------------+---------------+-------+-------+
+  |appIndex|resourceProfileId|numExecutors|executorCores|maxMem     |maxOnHeapMem|maxOffHeapMem|executorMemory|numGpusPerExecutor|executorOffHeap|taskCpu|taskGpu|
+  +--------+-----------------+------------+-------------+-----------+------------+-------------+--------------+------------------+---------------+-------+-------+
+  |1       |0                |1           |4            |11264537395|11264537395 |0            |20480         |1                 |0              |1      |0.0    |
+  |1       |1                |2           |2            |3247335014 |3247335014  |0            |6144          |2                 |0              |2      |2.0    |
+  +--------+-----------------+------------+-------------+-----------+------------+-------------+-------------+--------------+------------------+---------------+-------+-------+
+  ```
 
 - Matching SQL IDs Across Applications:
 
-```
-Matching SQL IDs Across Applications:
-+-----------------------+-----------------------+
-|app-20210329165943-0103|app-20210329170243-0018|
-+-----------------------+-----------------------+
-|0                      |0                      |
-|1                      |1                      |
-|2                      |2                      |
-|3                      |3                      |
-|4                      |4                      |
-+-----------------------+-----------------------+
-```
+  ```
+  +-----------------------+-----------------------+
+  |app-20210329165943-0103|app-20210329170243-0018|
+  +-----------------------+-----------------------+
+  |0                      |0                      |
+  |1                      |1                      |
+  |2                      |2                      |
+  |3                      |3                      |
+  |4                      |4                      |
+  +-----------------------+-----------------------+
+  ```
 
-There is one column per application. There is a row per SQL ID. The SQL IDs are matched
-primarily on the structure of the SQL query run, and then on the order in which they were
-run. Be aware that this is truly the structure of the query. Two queries that do similar
-things, but on different data are likely to match as the same.  An effort is made to
-also match between CPU plans and GPU plans so in most cases the same query run on the
-CPU and on the GPU will match.
+  There is one column per application. There is a row per SQL ID. The SQL IDs are matched
+  primarily on the structure of the SQL query run, and then on the order in which they were
+  run. Be aware that this is truly the structure of the query. Two queries that do similar
+  things, but on different data are likely to match as the same.  An effort is made to
+  also match between CPU plans and GPU plans so in most cases the same query run on the
+  CPU and on the GPU will match.
 
 - Matching Stage IDs Across Applications:
 
-```
-Matching Stage IDs Across Applications:
-+-----------------------+-----------------------+
-|app-20210329165943-0103|app-20210329170243-0018|
-+-----------------------+-----------------------+
-|31                     |31                     |
-|32                     |32                     |
-|33                     |33                     |
-|39                     |38                     |
-|40                     |40                     |
-|41                     |41                     |
-+-----------------------+-----------------------+
-```
+  ```
+  +-----------------------+-----------------------+
+  |app-20210329165943-0103|app-20210329170243-0018|
+  +-----------------------+-----------------------+
+  |31                     |31                     |
+  |32                     |32                     |
+  |33                     |33                     |
+  |39                     |38                     |
+  |40                     |40                     |
+  |41                     |41                     |
+  +-----------------------+-----------------------+
+  ```
 
-There is one column per application. There is a row per stage ID. If a SQL query matches
-between applications, see Matching SQL IDs Across Applications, then an attempt is made
-to match stages within that application to each other.  This has the same issues with
-stages when generating a dot graph.  This can be especially helpful when trying to compare
-large queries and Spark happened to assign the stage IDs slightly differently, or in some
-cases there are a different number of stages because of slight differences in the plan. This
-is a best effort, and it is not guaranteed to match up all stages in a plan.
+  There is one column per application. There is a row per stage ID. If a SQL query matches
+  between applications, see Matching SQL IDs Across Applications, then an attempt is made
+  to match stages within that application to each other.  This has the same issues with
+  stages when generating a dot graph.  This can be especially helpful when trying to compare
+  large queries and Spark happened to assign the stage IDs slightly differently, or in some
+  cases there are a different number of stages because of slight differences in the plan. This
+  is a best effort, and it is not guaranteed to match up all stages in a plan.
 
-- SQL to Stage Information (sorted by stage duration)
+- SQL to Stage Information (sorted by stage duration):
 
-Note that not all SQL nodes have a mapping to stage id so some nodes might be missing.
+  Note that not all SQL nodes have a mapping to stage id so some nodes might be missing.
 
-```
-SQL to Stage Information:
-+--------+-----+-----+-------+--------------+--------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-|appIndex|sqlID|jobID|stageId|stageAttemptId|Stage Duration|SQL Nodes(IDs)                                                                                                                                                     |
-+--------+-----+-----+-------+--------------+--------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-|1       |0    |1    |1      |0             |8174          |Exchange(9),WholeStageCodegen (1)(10),Scan(13)                                                                                                                     |
-|1       |0    |1    |2      |0             |8154          |Exchange(16),WholeStageCodegen (3)(17),Scan(20)                                                                                                                    |
-|1       |0    |1    |3      |0             |2148          |Exchange(2),HashAggregate(4),SortMergeJoin(6),WholeStageCodegen (5)(3),Sort(8),WholeStageCodegen (2)(7),Exchange(9),Sort(15),WholeStageCodegen (4)(14),Exchange(16)|
-|1       |0    |1    |4      |0             |126           |HashAggregate(1),WholeStageCodegen (6)(0),Exchange(2)                                                                                                              |
-+--------+-----+-----+-------+--------------+--------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-```
+  ```
+  +--------+-----+-----+-------+--------------+--------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+  |appIndex|sqlID|jobID|stageId|stageAttemptId|Stage Duration|SQL Nodes(IDs)                                                                                                                                                     |
+  +--------+-----+-----+-------+--------------+--------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+  |1       |0    |1    |1      |0             |8174          |Exchange(9),WholeStageCodegen (1)(10),Scan(13)                                                                                                                     |
+  |1       |0    |1    |2      |0             |8154          |Exchange(16),WholeStageCodegen (3)(17),Scan(20)                                                                                                                    |
+  |1       |0    |1    |3      |0             |2148          |Exchange(2),HashAggregate(4),SortMergeJoin(6),WholeStageCodegen (5)(3),Sort(8),WholeStageCodegen (2)(7),Exchange(9),Sort(15),WholeStageCodegen (4)(14),Exchange(16)|
+  |1       |0    |1    |4      |0             |126           |HashAggregate(1),WholeStageCodegen (6)(0),Exchange(2)                                                                                                              |
+  +--------+-----+-----+-------+--------------+--------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+  ```
 
 - Compare Rapids related Spark properties side-by-side:
 
-```
-Compare Rapids Properties which are set explicitly:
-+-------------------------------------------+----------+----------+
-|propertyName                               |appIndex_1|appIndex_2|
-+-------------------------------------------+----------+----------+
-|spark.rapids.memory.pinnedPool.size        |null      |2g        |
-|spark.rapids.sql.castFloatToDecimal.enabled|null      |true      |
-|spark.rapids.sql.concurrentGpuTasks        |null      |2         |
-|spark.rapids.sql.enabled                   |false     |true      |
-|spark.rapids.sql.explain                   |null      |NOT_ON_GPU|
-|spark.rapids.sql.incompatibleOps.enabled   |null      |true      |
-+-------------------------------------------+----------+----------+
-```
+  ```
+  Compare Rapids Properties which are set explicitly:
+  +-------------------------------------------+----------+----------+
+  |propertyName                               |appIndex_1|appIndex_2|
+  +-------------------------------------------+----------+----------+
+  |spark.rapids.memory.pinnedPool.size        |null      |2g        |
+  |spark.rapids.sql.castFloatToDecimal.enabled|null      |true      |
+  |spark.rapids.sql.concurrentGpuTasks        |null      |2         |
+  |spark.rapids.sql.enabled                   |false     |true      |
+  |spark.rapids.sql.explain                   |null      |NOT_ON_GPU|
+  |spark.rapids.sql.incompatibleOps.enabled   |null      |true      |
+  +-------------------------------------------+----------+----------+
+  ```
  
 - List rapids-4-spark jars based on classpath:
 
-```
-Rapids Accelerator jar:
-+--------+------------------------------------------------------------+
-|appIndex|Rapids4Spark jars                                           |
-+--------+------------------------------------------------------------+
-|1       |spark://10.10.10.10:43445/jars/rapids-4-spark_2.12-0.5.0.jar|
-|2       |spark://10.10.10.11:41319/jars/rapids-4-spark_2.12-0.5.0.jar|
-+--------+------------------------------------------------------------+
-```
+  ```
+  Rapids Accelerator jar:
+  +--------+------------------------------------------------------------+
+  |appIndex|Rapids4Spark jars                                           |
+  +--------+------------------------------------------------------------+
+  |1       |spark://10.10.10.10:43445/jars/rapids-4-spark_2.12-0.5.0.jar|
+  |2       |spark://10.10.10.11:41319/jars/rapids-4-spark_2.12-0.5.0.jar|
+  +--------+------------------------------------------------------------+
+  ```
 
 - Job, stage and SQL ID information(not in `compare` mode yet):
 
-```
-Job Information:
-+--------+-----+---------+-----+-------------+-------------+
-|appIndex|jobID|stageIds |sqlID|startTime    |endTime      |
-+--------+-----+---------+-----+-------------+-------------+
-|1       |0    |[0]      |null |1622846402778|1622846410240|
-|1       |1    |[1,2,3,4]|0    |1622846431114|1622846441591|
-+--------+-----+---------+-----+-------------+-------------+
-```
+  ```
+  Job Information:
+  +--------+-----+---------+-----+-------------+-------------+
+  |appIndex|jobID|stageIds |sqlID|startTime    |endTime      |
+  +--------+-----+---------+-----+-------------+-------------+
+  |1       |0    |[0]      |null |1622846402778|1622846410240|
+  |1       |1    |[1,2,3,4]|0    |1622846431114|1622846441591|
+  +--------+-----+---------+-----+-------------+-------------+
+  ```
 
 - SQL Plan Metrics for Application for each SQL plan node in each SQL:
 
-These are also called accumulables in Spark.
-Note that not all SQL nodes have a mapping to stage id.
+  These are also called accumulables in Spark.  
+  Note that not all SQL nodes have a mapping to stage id.
 
-```
-SQL Plan Metrics for Application:
-+--------+-----+------+-----------------------------------------------------------+-------------+-----------------------+-------------+----------+--------+
-|appIndex|sqlID|nodeID|nodeName                                                   |accumulatorId|name                   |max_value    |metricType|stageIds|
-+--------+-----+------+-----------------------------------------------------------+-------------+-----------------------+-------------+----------+--------+
-|1       |0    |1     |GpuColumnarExchange                                        |111          |output rows            |1111111111   |sum       |4,3     |
-|1       |0    |1     |GpuColumnarExchange                                        |112          |output columnar batches|222222       |sum       |4,3     |
-|1       |0    |1     |GpuColumnarExchange                                        |113          |data size              |333333333333 |size      |4,3     |
-|1       |0    |1     |GpuColumnarExchange                                        |114          |shuffle bytes written  |444444444444 |size      |4,3     | 
-|1       |0    |1     |GpuColumnarExchange                                        |115          |shuffle records written|555555       |sum       |4,3     |
-|1       |0    |1     |GpuColumnarExchange                                        |116          |shuffle write time     |666666666666 |nsTiming  |4,3     |
-```
+  ```
+  SQL Plan Metrics for Application:
+  +--------+-----+------+--------------------+-------------+-----------------------+-------------+----------+--------+
+  |appIndex|sqlID|nodeID|nodeName            |accumulatorId|name                   |max_value    |metricType|stageIds|
+  +--------+-----+------+--------------------+-------------+-----------------------+-------------+----------+--------+
+  |1       |0    |1     |GpuColumnarExchange |111          |output rows            |1111111111   |sum       |4,3     |
+  |1       |0    |1     |GpuColumnarExchange |112          |output columnar batches|222222       |sum       |4,3     |
+  |1       |0    |1     |GpuColumnarExchange |113          |data size              |333333333333 |size      |4,3     |
+  |1       |0    |1     |GpuColumnarExchange |114          |shuffle bytes written  |444444444444 |size      |4,3     | 
+  |1       |0    |1     |GpuColumnarExchange |115          |shuffle records written|555555       |sum       |4,3     |
+  |1       |0    |1     |GpuColumnarExchange |116          |shuffle write time     |666666666666 |nsTiming  |4,3     |
+  ```
 
 - WholeStageCodeGen to Node Mapping (only for CPU logs):
 
-```
-WholeStageCodeGen Mapping:
-+--------+-----+------+---------------------+-------------------+------------+
-|appIndex|sqlID|nodeID|SQL Node             |Child Node         |Child NodeID|
-+--------+-----+------+---------------------+-------------------+------------+
-|1       |0    |0     |WholeStageCodegen (6)|HashAggregate      |1           |
-|1       |0    |3     |WholeStageCodegen (5)|HashAggregate      |4           |
-|1       |0    |3     |WholeStageCodegen (5)|Project            |5           |
-|1       |0    |3     |WholeStageCodegen (5)|SortMergeJoin      |6           |
-|1       |0    |7     |WholeStageCodegen (2)|Sort               |8           |
-```
+  ```
+  WholeStageCodeGen Mapping:
+  +--------+-----+------+---------------------+-------------------+------------+
+  |appIndex|sqlID|nodeID|SQL Node             |Child Node         |Child NodeID|
+  +--------+-----+------+---------------------+-------------------+------------+
+  |1       |0    |0     |WholeStageCodegen (6)|HashAggregate      |1           |
+  |1       |0    |3     |WholeStageCodegen (5)|HashAggregate      |4           |
+  |1       |0    |3     |WholeStageCodegen (5)|Project            |5           |
+  |1       |0    |3     |WholeStageCodegen (5)|SortMergeJoin      |6           |
+  |1       |0    |7     |WholeStageCodegen (2)|Sort               |8           |
+  ```
 
 
 #### B. Analysis
+
 - Job + Stage level aggregated task metrics
 - SQL level aggregated task metrics
 - SQL duration, application during, if it contains Dataset or RDD operation, potential problems, executor CPU time percent
 - Shuffle Skew Check: (When task's Shuffle Read Size > 3 * Avg Stage-level size)
 
-Below we will aggregate the task level metrics at different levels 
-to do some analysis such as detecting possible shuffle skew.
+  Below we will aggregate the task level metrics at different levels 
+  to do some analysis such as detecting possible shuffle skew.
 
 - Job + Stage level aggregated task metrics:
 
-```
-Job + Stage level aggregated task metrics:
-+--------+-------+--------+--------+--------------------+------------+------------+------------+------------+-------------------+------------------------------+---------------------------+-------------------+-------------------+---------------------+-------------+----------------------+-----------------------+-------------------------+-----------------------+---------------------------+--------------+--------------------+-------------------------+---------------------+--------------------------+----------------------+----------------------------+---------------------+-------------------+---------------------+----------------+
-|appIndex|ID     |numTasks|Duration|diskBytesSpilled_sum|duration_sum|duration_max|duration_min|duration_avg|executorCPUTime_sum|executorDeserializeCPUTime_sum|executorDeserializeTime_sum|executorRunTime_sum|input_bytesRead_sum|input_recordsRead_sum|jvmGCTime_sum|memoryBytesSpilled_sum|output_bytesWritten_sum|output_recordsWritten_sum|peakExecutionMemory_max|resultSerializationTime_sum|resultSize_max|sr_fetchWaitTime_sum|sr_localBlocksFetched_sum|sr_localBytesRead_sum|sr_remoteBlocksFetched_sum|sr_remoteBytesRead_sum|sr_remoteBytesReadToDisk_sum|sr_totalBytesRead_sum|sw_bytesWritten_sum|sw_recordsWritten_sum|sw_writeTime_sum|
-+--------+-------+--------+--------+--------------------+------------+------------+------------+------------+-------------------+------------------------------+---------------------------+-------------------+-------------------+---------------------+-------------+----------------------+-----------------------+-------------------------+-----------------------+---------------------------+--------------+--------------------+-------------------------+---------------------+--------------------------+----------------------+----------------------------+---------------------+-------------------+---------------------+----------------+
-|1       |job_0  |3333    |222222  |0                   |11111111    |111111      |111         |1111.1      |6666666            |55555                         |55555                      |55555555           |222222222222       |22222222222          |111111       |0                     |0                      |0                        |222222222              |1                          |11111         |11111               |99999                    |22222222222          |2222221                   |222222222222          |0                           |222222222222         |222222222222       |5555555              |444444          |
-```
+  ```
+  +--------+-------+--------+--------+--------------------+------------+------------+------------+------------+-------------------+------------------------------+---------------------------+-------------------+-------------------+---------------------+-------------+----------------------+-----------------------+-------------------------+-----------------------+---------------------------+--------------+--------------------+-------------------------+---------------------+--------------------------+----------------------+----------------------------+---------------------+-------------------+---------------------+----------------+
+  |appIndex|ID     |numTasks|Duration|diskBytesSpilled_sum|duration_sum|duration_max|duration_min|duration_avg|executorCPUTime_sum|executorDeserializeCPUTime_sum|executorDeserializeTime_sum|executorRunTime_sum|input_bytesRead_sum|input_recordsRead_sum|jvmGCTime_sum|memoryBytesSpilled_sum|output_bytesWritten_sum|output_recordsWritten_sum|peakExecutionMemory_max|resultSerializationTime_sum|resultSize_max|sr_fetchWaitTime_sum|sr_localBlocksFetched_sum|sr_localBytesRead_sum|sr_remoteBlocksFetched_sum|sr_remoteBytesRead_sum|sr_remoteBytesReadToDisk_sum|sr_totalBytesRead_sum|sw_bytesWritten_sum|sw_recordsWritten_sum|sw_writeTime_sum|
+  +--------+-------+--------+--------+--------------------+------------+------------+------------+------------+-------------------+------------------------------+---------------------------+-------------------+-------------------+---------------------+-------------+----------------------+-----------------------+-------------------------+-----------------------+---------------------------+--------------+--------------------+-------------------------+---------------------+--------------------------+----------------------+----------------------------+---------------------+-------------------+---------------------+----------------+
+  |1       |job_0  |3333    |222222  |0                   |11111111    |111111      |111         |1111.1      |6666666            |55555                         |55555                      |55555555           |222222222222       |22222222222          |111111       |0                     |0                      |0                        |222222222              |1                          |11111         |11111               |99999                    |22222222222          |2222221                   |222222222222          |0                           |222222222222         |222222222222       |5555555              |444444          |
+  ```
 
 - SQL level aggregated task metrics:
 
-```
-SQL level aggregated task metrics:
-+--------+------------------------------+-----+--------------------+--------+--------+---------------+---------------+----------------+--------------------+------------+------------+------------+------------+-------------------+------------------------------+---------------------------+-------------------+-------------------+---------------------+-------------+----------------------+-----------------------+-------------------------+-----------------------+---------------------------+--------------+--------------------+-------------------------+---------------------+--------------------------+----------------------+----------------------------+---------------------+-------------------+---------------------+----------------+
-|appIndex|appID                         |sqlID|description         |numTasks|Duration|executorCPUTime|executorRunTime|executorCPURatio|diskBytesSpilled_sum|duration_sum|duration_max|duration_min|duration_avg|executorCPUTime_sum|executorDeserializeCPUTime_sum|executorDeserializeTime_sum|executorRunTime_sum|input_bytesRead_sum|input_recordsRead_sum|jvmGCTime_sum|memoryBytesSpilled_sum|output_bytesWritten_sum|output_recordsWritten_sum|peakExecutionMemory_max|resultSerializationTime_sum|resultSize_max|sr_fetchWaitTime_sum|sr_localBlocksFetched_sum|sr_localBytesRead_sum|sr_remoteBlocksFetched_sum|sr_remoteBytesRead_sum|sr_remoteBytesReadToDisk_sum|sr_totalBytesRead_sum|sw_bytesWritten_sum|sw_recordsWritten_sum|sw_writeTime_sum|
-+--------+------------------------------+-----+--------------------+--------+--------+---------------+---------------+----------------+--------------------+------------+------------+------------+------------+-------------------+------------------------------+---------------------------+-------------------+-------------------+---------------------+-------------+----------------------+-----------------------+-------------------------+-----------------------+---------------------------+--------------+--------------------+-------------------------+---------------------+--------------------------+----------------------+----------------------------+---------------------+-------------------+---------------------+----------------+
-|1       |application_1111111111111_0001|0    |show at <console>:11|1111    |222222  |6666666        |55555555       |55.55           |0                   |13333333    |111111      |999         |3333.3      |6666666            |55555                         |66666                      |11111111           |111111111111       |11111111111          |111111       |0                     |0                      |0                        |888888888              |8                          |11111         |11111               |99999                    |11111111111          |2222222                   |222222222222          |0                           |222222222222         |444444444444       |5555555              |444444          |
-```
+  ```
+  SQL level aggregated task metrics:
+  +--------+------------------------------+-----+--------------------+--------+--------+---------------+---------------+----------------+--------------------+------------+------------+------------+------------+-------------------+------------------------------+---------------------------+-------------------+-------------------+---------------------+-------------+----------------------+-----------------------+-------------------------+-----------------------+---------------------------+--------------+--------------------+-------------------------+---------------------+--------------------------+----------------------+----------------------------+---------------------+-------------------+---------------------+----------------+
+  |appIndex|appID                         |sqlID|description         |numTasks|Duration|executorCPUTime|executorRunTime|executorCPURatio|diskBytesSpilled_sum|duration_sum|duration_max|duration_min|duration_avg|executorCPUTime_sum|executorDeserializeCPUTime_sum|executorDeserializeTime_sum|executorRunTime_sum|input_bytesRead_sum|input_recordsRead_sum|jvmGCTime_sum|memoryBytesSpilled_sum|output_bytesWritten_sum|output_recordsWritten_sum|peakExecutionMemory_max|resultSerializationTime_sum|resultSize_max|sr_fetchWaitTime_sum|sr_localBlocksFetched_sum|sr_localBytesRead_sum|sr_remoteBlocksFetched_sum|sr_remoteBytesRead_sum|sr_remoteBytesReadToDisk_sum|sr_totalBytesRead_sum|sw_bytesWritten_sum|sw_recordsWritten_sum|sw_writeTime_sum|
+  +--------+------------------------------+-----+--------------------+--------+--------+---------------+---------------+----------------+--------------------+------------+------------+------------+------------+-------------------+------------------------------+---------------------------+-------------------+-------------------+---------------------+-------------+----------------------+-----------------------+-------------------------+-----------------------+---------------------------+--------------+--------------------+-------------------------+---------------------+--------------------------+----------------------+----------------------------+---------------------+-------------------+---------------------+----------------+
+  |1       |application_1111111111111_0001|0    |show at <console>:11|1111    |222222  |6666666        |55555555       |55.55           |0                   |13333333    |111111      |999         |3333.3      |6666666            |55555                         |66666                      |11111111           |111111111111       |11111111111          |111111       |0                     |0                      |0                        |888888888              |8                          |11111         |11111               |99999                    |11111111111          |2222222                   |222222222222          |0                           |222222222222         |444444444444       |5555555              |444444          |
+  ```
 
 - SQL duration, application during, if it contains Dataset or RDD operation, potential problems, executor CPU time percent:
 
-```
-SQL Duration and Executor CPU Time Percent
-+--------+-------------------+-----+------------+--------------------------+------------+---------------------------+-------------------------+
-|appIndex|App ID             |sqlID|SQL Duration|Contains Dataset or RDD Op|App Duration|Potential Problems         |Executor CPU Time Percent|
-+--------+-------------------+-----+------------+--------------------------+------------+---------------------------+-------------------------+
-|1       |local-1626104300434|0    |1260        |false                     |131104      |NESTED COMPLEX TYPE        |92.65                    |
-|1       |local-1626104300434|1    |259         |false                     |131104      |NESTED COMPLEX TYPE        |76.79                    |
-```
+  ```
+  SQL Duration and Executor CPU Time Percent
+  +--------+-------------------+-----+------------+--------------------------+------------+---------------------------+-------------------------+
+  |appIndex|App ID             |sqlID|SQL Duration|Contains Dataset or RDD Op|App Duration|Potential Problems         |Executor CPU Time Percent|
+  +--------+-------------------+-----+------------+--------------------------+------------+---------------------------+-------------------------+
+  |1       |local-1626104300434|0    |1260        |false                     |131104      |NESTED COMPLEX TYPE        |92.65                    |
+  |1       |local-1626104300434|1    |259         |false                     |131104      |NESTED COMPLEX TYPE        |76.79                    |
+  ```
 
 - Shuffle Skew Check: 
 
-```
-Shuffle Skew Check: (When task's Shuffle Read Size > 3 * Avg Stage-level size)
-+--------+-------+--------------+------+-------+---------------+--------------+-----------------+----------------+----------------+----------+----------------------------------------------------------------------------------------------------+
-|appIndex|stageId|stageAttemptId|taskId|attempt|taskDurationSec|avgDurationSec|taskShuffleReadMB|avgShuffleReadMB|taskPeakMemoryMB|successful|reason                                                                                              |
-+--------+-------+--------------+------+-------+---------------+--------------+-----------------+----------------+----------------+----------+----------------------------------------------------------------------------------------------------+
-|1       |2      |0             |2222  |0      |111.11         |7.7           |2222.22          |111.11          |0.01            |false     |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /dddd/xxxxxxx/ccccc/bbbbbbbbb/aaaaaaa|
-|1       |2      |0             |2224  |1      |222.22         |8.8           |3333.33          |111.11          |0.01            |false     |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /dddd/xxxxxxx/ccccc/bbbbbbbbb/aaaaaaa|
-+--------+-------+--------------+------+-------+---------------+--------------+-----------------+----------------+----------------+----------+----------------------------------------------------------------------------------------------------+
-```
+  ```
+  Shuffle Skew Check: (When task's Shuffle Read Size > 3 * Avg Stage-level size)
+  +--------+-------+--------------+------+-------+---------------+--------------+-----------------+----------------+----------------+----------+----------------------------------------------------------------------------------------------------+
+  |appIndex|stageId|stageAttemptId|taskId|attempt|taskDurationSec|avgDurationSec|taskShuffleReadMB|avgShuffleReadMB|taskPeakMemoryMB|successful|reason                                                                                              |
+  +--------+-------+--------------+------+-------+---------------+--------------+-----------------+----------------+----------------+----------+----------------------------------------------------------------------------------------------------+
+  |1       |2      |0             |2222  |0      |111.11         |7.7           |2222.22          |111.11          |0.01            |false     |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /dddd/xxxxxxx/ccccc/bbbbbbbbb/aaaaaaa|
+  |1       |2      |0             |2224  |1      |222.22         |8.8           |3333.33          |111.11          |0.01            |false     |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /dddd/xxxxxxx/ccccc/bbbbbbbbb/aaaaaaa|
+  +--------+-------+--------------+------+-------+---------------+--------------+-----------------+----------------+----------------+----------+----------------------------------------------------------------------------------------------------+
+  ```
 
 #### C. Health Check
+
 - List failed tasks, stages and jobs
 - Removed BlockManagers and Executors
 - SQL Plan HealthCheck
 
 Below are examples.
+
 - Print failed tasks:
 
-```
-Failed tasks:
-+--------+-------+--------------+------+-------+----------------------------------------------------------------------------------------------------+
-|appIndex|stageId|stageAttemptId|taskId|attempt|failureReason                                                                              |
-+--------+-------+--------------+------+-------+----------------------------------------------------------------------------------------------------+
-|3       |4      |0             |2842  |0      |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /home/jenkins/agent/workspace/jenkins|
-|3       |4      |0             |2858  |0      |TaskKilled(another attempt succeeded,List(AccumulableInfo(453,None,Some(22000),None,false,true,None)|
-|3       |4      |0             |2884  |0      |TaskKilled(another attempt succeeded,List(AccumulableInfo(453,None,Some(21148),None,false,true,None)|
-|3       |4      |0             |2908  |0      |TaskKilled(another attempt succeeded,List(AccumulableInfo(453,None,Some(20420),None,false,true,None)|
-|3       |4      |0             |3410  |1      |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /home/jenkins/agent/workspace/jenkins|
-|4       |1      |0             |1948  |1      |TaskKilled(another attempt succeeded,List(AccumulableInfo(290,None,Some(1107),None,false,true,None),|
-+--------+-------+--------------+------+-------+----------------------------------------------------------------------------------------------------+
-```
+  ```
+  Failed tasks:
+  +--------+-------+--------------+------+-------+----------------------------------------------------------------------------------------------------+
+  |appIndex|stageId|stageAttemptId|taskId|attempt|failureReason                                                                              |
+  +--------+-------+--------------+------+-------+----------------------------------------------------------------------------------------------------+
+  |3       |4      |0             |2842  |0      |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /home/jenkins/agent/workspace/jenkins|
+  |3       |4      |0             |2858  |0      |TaskKilled(another attempt succeeded,List(AccumulableInfo(453,None,Some(22000),None,false,true,None)|
+  |3       |4      |0             |2884  |0      |TaskKilled(another attempt succeeded,List(AccumulableInfo(453,None,Some(21148),None,false,true,None)|
+  |3       |4      |0             |2908  |0      |TaskKilled(another attempt succeeded,List(AccumulableInfo(453,None,Some(20420),None,false,true,None)|
+  |3       |4      |0             |3410  |1      |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /home/jenkins/agent/workspace/jenkins|
+  |4       |1      |0             |1948  |1      |TaskKilled(another attempt succeeded,List(AccumulableInfo(290,None,Some(1107),None,false,true,None),|
+  +--------+-------+--------------+------+-------+----------------------------------------------------------------------------------------------------+
+  ```
 
 - Print failed stages:
 
-```
-Failed stages:
-+--------+-------+---------+-------------------------------------+--------+---------------------------------------------------+
-|appIndex|stageId|attemptId|name                                 |numTasks|failureReason                                      |
-+--------+-------+---------+-------------------------------------+--------+---------------------------------------------------+
-|3       |4      |0        |attachTree at Spark300Shims.scala:624|1000    |Job 0 cancelled as part of cancellation of all jobs|
-+--------+-------+---------+-------------------------------------+--------+---------------------------------------------------+
-```
+  ```
+  Failed stages:
+  +--------+-------+---------+-------------------------------------+--------+---------------------------------------------------+
+  |appIndex|stageId|attemptId|name                                 |numTasks|failureReason                                      |
+  +--------+-------+---------+-------------------------------------+--------+---------------------------------------------------+
+  |3       |4      |0        |attachTree at Spark300Shims.scala:624|1000    |Job 0 cancelled as part of cancellation of all jobs|
+  +--------+-------+---------+-------------------------------------+--------+---------------------------------------------------+
+  ```
 
 - Print failed jobs:
 
-```
-Failed jobs:
-+--------+-----+---------+------------------------------------------------------------------------+
-|appIndex|jobID|jobResult|failureReason                                                           |
-+--------+-----+---------+------------------------------------------------------------------------+
-|3       |0    |JobFailed|java.lang.Exception: Job 0 cancelled as part of cancellation of all j...|
-+--------+-----+---------+------------------------------------------------------------------------+
-```
+  ```
+  Failed jobs:
+  +--------+-----+---------+------------------------------------------------------------------------+
+  |appIndex|jobID|jobResult|failureReason                                                           |
+  +--------+-----+---------+------------------------------------------------------------------------+
+  |3       |0    |JobFailed|java.lang.Exception: Job 0 cancelled as part of cancellation of all j...|
+  +--------+-----+---------+------------------------------------------------------------------------+
+  ```
 
 - SQL Plan HealthCheck:
 
   Prints possibly unsupported query plan nodes such as `$Lambda` key word means dataset API.
 
-```
-+--------+-----+------+--------+---------------------------------------------------------------------------------------------------+
-|appIndex|sqlID|nodeID|nodeName|nodeDescription                                                                                    |
-+--------+-----+------+--------+---------------------------------------------------------------------------------------------------+
-|3       |1    |8     |Filter  |Filter $line21.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$Lambda$4578/0x00000008019f1840@4b63e04c.apply|
-+--------+-----+------+--------+---------------------------------------------------------------------------------------------------+
-```
+  ```
+  +--------+-----+------+--------+---------------------------------------------------------------------------------------------------+
+  |appIndex|sqlID|nodeID|nodeName|nodeDescription                                                                                    |
+  +--------+-----+------+--------+---------------------------------------------------------------------------------------------------+
+  |3       |1    |8     |Filter  |Filter $line21.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$Lambda$4578/0x00000008019f1840@4b63e04c.apply|
+  +--------+-----+------+--------+---------------------------------------------------------------------------------------------------+
+  ```
 
 #### D. Recommended Configuration
 
 The _Auto-Tuner_ output has 2 main sections:
+
 1. _Spark Properties_: A list of Apache Spark configurations to tune the performance of the app.
    The list is the result of `diff` between the existing app configurations and the recommended
    ones. Therefore, a recommendation matches the existing app configuration, it will not show up in
@@ -449,6 +455,7 @@ The _Auto-Tuner_ output has 2 main sections:
 **Examples**
 
 - A succesful run with missing _softwareProperties_:
+
    ```
    Spark Properties:
    --conf spark.executor.cores=16
@@ -474,8 +481,9 @@ The _Auto-Tuner_ output has 2 main sections:
    - 'spark.sql.adaptive.enabled' should be enabled for better performance.
    ```
 
-- A succesful run with defined _softwareProperties_. In this example, only
-  two recommendations did not match the existing app app configurations:
+- A successful run with defined _softwareProperties_.   In this example, only
+  two recommendations did not match the existing app configurations:
+
   ```
   Spark Properties:
   --conf spark.executor.instances=8
@@ -486,6 +494,7 @@ The _Auto-Tuner_ output has 2 main sections:
   ```
 
 - Failing to load the worker info:
+
   ```
   Cannot recommend properties. See Comments.
 
@@ -501,83 +510,84 @@ The _Auto-Tuner_ output has 2 main sections:
 
 ### Generating Visualizations
 
-- Print SQL Plans (--print-plans option):
-Prints the SQL plan as a text string to a file named `planDescriptions.log`.
+- Print SQL Plans (`--print-plans` option):
+  Prints the SQL plan as a text string to a file named `planDescriptions.log`.
 
-- Generate DOT graph for each SQL (--generate-dot option):
+- Generate DOT graph for each SQL (`--generate-dot` option):
 
-```
-Generated DOT graphs for app app-20210507103057-0000 to /path/. in 17 second(s)
-```
+  ```
+  Generated DOT graphs for app app-20210507103057-0000 to /path/. in 17 second(s)
+  ```
 
-A dot file will be generated for each query in the application.
-Once the DOT file is generated, you can install [graphviz](http://www.graphviz.org) to convert the DOT file
-as a graph in pdf format using below command:
+  A dot file will be generated for each query in the application.
+  Once the DOT file is generated, you can install [graphviz](http://www.graphviz.org) to convert the DOT file
+  as a graph in pdf format using below command:
 
-```bash
-dot -Tpdf ./app-20210507103057-0000-query-0/0.dot > app-20210507103057-0000.pdf
-```
+  ```bash
+  dot -Tpdf ./app-20210507103057-0000-query-0/0.dot > app-20210507103057-0000.pdf
+  ```
 
-Or to svg using
-```bash
-dot -Tsvg ./app-20210507103057-0000-query-0/0.dot > app-20210507103057-0000.svg
-```
+  Or to svg using
 
-The pdf or svg file has the SQL plan graph with metrics. The svg file will act a little
-more like the Spark UI and include extra information for nodes when hovering over it with
-a mouse.
-
-As a part of this an effort is made to associate parts of the graph with the Spark stage it is a
-part of. This is not 100% accurate. Some parts of the plan like `TakeOrderedAndProject` may
-be a part of multiple stages and only one of the stages will be selected. `Exchanges` are purposely
-left out of the sections associated with a stage because they cover at least 2 stages and possibly
-more. In other cases we may not be able to determine what stage something was a part of. In those
-cases we mark it as `UNKNOWN STAGE`. This is because we rely on metrics to link a node to a stage.
-If a stage has no metrics, like if the query crashed early, we cannot establish that link.
-
-- Generate timeline for application (--generate-timeline option):
-
-The output of this is an [svg](https://en.wikipedia.org/wiki/Scalable_Vector_Graphics) file
-named `timeline.svg`.  Most web browsers can display this file.  It is a
-timeline view similar to Apache Spark's
-[event timeline](https://spark.apache.org/docs/latest/web-ui.html).
-
-This displays several data sections.
-
-1. **Tasks** This shows all tasks in the application divided by executor. Please note that this
-   tries to pack the tasks in the graph. It does not represent actual scheduling on CPU cores.
-   The tasks are labeled with the time it took for them to run. There is a breakdown of some metrics
-   per task in the lower half of the task block with different colors used to designate different
-   metrics.
-   1. Yellow is the deserialization time for the task as reported by Spark. This works for both CPU
-   and GPU tasks.
-   2. White is the read time for a task. This is a combination of the "buffer time" GPU SQL metric
-   and the shuffle read time as reported by Spark. The shuffle time works for both CPU and GPU
-   tasks, but "buffer time" only is reported for GPU accelerated file reads.
-   3. Red is the semaphore wait time. This is the amount of time a task spent waiting to get access
-   to the GPU. When processing logs generated by versions of the spark rapids plugin prior to
-   23.04 this would only show up on GPU tasks when DEBUG metrics are enabled. For logs generated
-   with 23.04 and above it is always on. It does not apply to CPU tasks, as they don't go through
-   the Semaphore.
-   4. Green is the "op time" SQL metric along with a few other metrics that also indicate the amount
-   of time the GPU was being used to process data. This is GPU specific.
-   5. Blue is the write time for a task. This is the "write time" SQL metric used when writing out
-   results as files using GPU acceleration, or it is the shuffle write time as reported by Spark.
-   The shuffle metrics work for both CPU and GPU tasks, but the "write time" metrics is GPU specific.
-   6. Anything else is time that is not accounted for by these metrics. Typically, this is time
-   spent on the CPU, but could also include semaphore wait time as DEBUG metrics are not on by
-   default.
-2. **STAGES** This shows the stages times reported by Spark. It starts with when the stage was
-   scheduled and ends when Spark considered the stage done.
-3. **STAGE RANGES** This shows the time from the start of the first task to the end of the last
-   task. Often a stage is scheduled, but there are not enough resources in the cluster to run it.
-   This helps to show. How long it takes for a task to start running after it is scheduled, and in
-   many cases how long it took to run all of the tasks in the stage. This is not always true because
-   Spark can intermix tasks from different stages.
-4. **JOBS** This shows the time range reported by Spark from when a job was scheduled to when it
-   completed.
-5. **SQL** This shows the time range reported by Spark from when a SQL statement was scheduled to
-   when it completed.
+  ```bash
+  dot -Tsvg ./app-20210507103057-0000-query-0/0.dot > app-20210507103057-0000.svg
+  ```
+
+  The pdf or svg file has the SQL plan graph with metrics. The svg file will act a little
+  more like the Spark UI and include extra information for nodes when hovering over it with
+  a mouse.
+
+  As a part of this an effort is made to associate parts of the graph with the Spark stage it is a
+  part of. This is not 100% accurate. Some parts of the plan like `TakeOrderedAndProject` may
+  be a part of multiple stages and only one of the stages will be selected. `Exchanges` are purposely
+  left out of the sections associated with a stage because they cover at least 2 stages and possibly
+  more. In other cases we may not be able to determine what stage something was a part of. In those
+  cases we mark it as `UNKNOWN STAGE`. This is because we rely on metrics to link a node to a stage.
+  If a stage has no metrics, like if the query crashed early, we cannot establish that link.
+
+- Generate timeline for application (`--generate-timeline` option):
+
+  The output of this is an [svg](https://en.wikipedia.org/wiki/Scalable_Vector_Graphics) file
+  named `timeline.svg`.  Most web browsers can display this file.  It is a
+  timeline view similar to Apache Spark's
+  [event timeline](https://spark.apache.org/docs/latest/web-ui.html).
+
+  This displays several data sections.
+
+  1. **Tasks** This shows all tasks in the application divided by executor. Please note that this
+     tries to pack the tasks in the graph. It does not represent actual scheduling on CPU cores.
+     The tasks are labeled with the time it took for them to run. There is a breakdown of some metrics
+     per task in the lower half of the task block with different colors used to designate different
+     metrics.
+     1. Yellow is the deserialization time for the task as reported by Spark. This works for both CPU
+        and GPU tasks.
+     2. White is the read time for a task. This is a combination of the "buffer time" GPU SQL metric
+     and the shuffle read time as reported by Spark. The shuffle time works for both CPU and GPU
+     tasks, but "buffer time" only is reported for GPU accelerated file reads.
+     3. Red is the semaphore wait time. This is the amount of time a task spent waiting to get access
+     to the GPU. When processing logs generated by versions of the spark rapids plugin prior to
+     23.04 this would only show up on GPU tasks when DEBUG metrics are enabled. For logs generated
+     with 23.04 and above it is always on. It does not apply to CPU tasks, as they don't go through
+     the Semaphore.
+     4. Green is the "op time" SQL metric along with a few other metrics that also indicate the amount
+     of time the GPU was being used to process data. This is GPU specific.
+     5. Blue is the write time for a task. This is the "write time" SQL metric used when writing out
+     results as files using GPU acceleration, or it is the shuffle write time as reported by Spark.
+     The shuffle metrics work for both CPU and GPU tasks, but the "write time" metrics is GPU specific.
+     6. Anything else is time that is not accounted for by these metrics. Typically, this is time
+     spent on the CPU, but could also include semaphore wait time as DEBUG metrics are not on by
+     default.
+  2. **STAGES** This shows the stages times reported by Spark. It starts with when the stage was
+     scheduled and ends when Spark considered the stage done.
+  3. **STAGE RANGES** This shows the time from the start of the first task to the end of the last
+     task. Often a stage is scheduled, but there are not enough resources in the cluster to run it.
+     This helps to show. How long it takes for a task to start running after it is scheduled, and in
+     many cases how long it took to run all the tasks in the stage. This is not always true because
+     Spark can intermix tasks from different stages.
+  4. **JOBS** This shows the time range reported by Spark from when a job was scheduled to when it
+     completed.
+  5. **SQL** This shows the time range reported by Spark from when a SQL statement was scheduled to
+     when it completed.
 
 Tasks and stages all are color coordinated to help know what tasks are associated with a given
 stage. Jobs and SQL are not color coordinated.
@@ -670,6 +680,7 @@ Please refer to [Understanding the Profiling tool output](#d-recommended-configu
 more details on the output of the _Auto-Tuner_.
 
 Note the following _Auto-Tuner_ limitations:
+
 - It is currently only supported in the _Collection Mode_ (see [the 3 different modes](#step-2-how-to-run-the-profiling-tool)), and
 - It is assumed that all the _worker_ nodes on the cluster are homogenous.
 
@@ -701,14 +712,14 @@ A template of the worker information is shown below:
   ```
 
 
-| Property           | Optional |                                                          If Missing                                                          |
-|--------------------|:--------:|:----------------------------------------------------------------------------------------------------------------------------:|
+| Property           | Optional | If Missing                                                                                                                   |
+|--------------------|:--------:|------------------------------------------------------------------------------------------------------------------------------|
 | system.numCores    |    No    | _Auto-Tuner_ does not calculate recommendations                                                                              |
 | system.memory      |    No    | _Auto-Tuner_ does not calculate any recommendations                                                                          |
-| system.numWorkers  |    Yes   | Default: 1                                                                                                                   |
-| gpu.name           |    Yes   | Default: T4 (Nvidia Tesla T4)                                                                                                |
-| gpu.memory         |    Yes   | Default: 16G                                                                                                                 |
-| softwareProperties |    Yes   | This section is optional. The _Auto-Tuner_ reads the configs within the logs of the Apache Spark apps with higher precedence |
+| system.numWorkers  |   Yes    | Default: 1                                                                                                                   |
+| gpu.name           |   Yes    | Default: T4 (Nvidia Tesla T4)                                                                                                |
+| gpu.memory         |   Yes    | Default: 16G                                                                                                                 |
+| softwareProperties |   Yes    | This section is optional. The _Auto-Tuner_ reads the configs within the logs of the Apache Spark apps with higher precedence |
 
 ## Profiling tool metrics definitions
 

From 1aed70fdd5f8831bbd6ba8173bbf4ba8b4d8f432 Mon Sep 17 00:00:00 2001
From: Niranjan Artal <50492963+nartal1@users.noreply.github.com>
Date: Wed, 12 Jul 2023 16:14:05 -0700
Subject: [PATCH 11/14] Support onprem platform for user-tools profiling
 command (#431)

* Support onprem platform for user-tools profiling command

---------

Signed-off-by: Niranjan Artal <nartal@nvidia.com>
---
 user_tools/docs/user-tools-onprem.md          |  93 +++++++++++++++-
 .../wrappers/onprem_wrapper.py                | 102 ++++++++++++++++--
 2 files changed, 186 insertions(+), 9 deletions(-)

diff --git a/user_tools/docs/user-tools-onprem.md b/user_tools/docs/user-tools-onprem.md
index aa09e88a4..e50120985 100644
--- a/user_tools/docs/user-tools-onprem.md
+++ b/user_tools/docs/user-tools-onprem.md
@@ -219,4 +219,95 @@ The command creates a directory with UUID that contains the following:
     └── qualification_summary.csv
     3 directories, 9 files
     ```
-  
\ No newline at end of file
+
+## Profiling command
+
+### Local deployment
+
+```
+spark_rapids_user_tools onprem profiling [options]
+spark_rapids_user_tools onprem profiling -- --help
+```
+
+The local deployment runs on the local development machine. It requires:
+1. Java 1.8+ development environment.
+2. Internet access to download JAR dependencies from mvn: `spark-*.jar`.
+3. Dependencies are cached on the local disk to reduce the overhead of the download.
+
+#### Command options
+
+| Option               | Description                                                                                                                                                                                                                                               | Default                                                                                                                                       | Required |
+|----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------|----------|
+| **worker_info**      | A path pointing to a yaml file containing the system information of a worker node. It is assumed that all workers are homogenous. The format of the file is described in the following section.                                                           | None                                                                                                                                          | Y        |
+| **eventlogs**        | A comma separated list to event logs or directory                                                                                                                                                                                                         | None                                                                                                                                          | N        |
+| **local_folder**     | Local work-directory path to store the output and to be used as root directory for temporary folders/files. The final output will go into a subdirectory named `prof-${EXEC_ID}` where `exec_id` is an auto-generated unique identifier of the execution. | If the argument is NONE, the default value is the env variable `RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY` if any; or the current working directory. | N        |
+| **jvm_heap_size**    | The maximum heap size of the JVM in gigabytes                                                                                                                                                                                                             | 24                                                                                                                                            | N        |
+| **tools_jar**        | Path to a bundled jar including RAPIDS tool. The path is a local filesystem.                                                                                                                                                                              | Downloads the latest `rapids-4-spark-tools_*.jar` from mvn repo                                                                               | N        |
+| **verbose**          | True or False to enable verbosity to the wrapper script                                                                                                                                                                                                   | False if `RAPIDS_USER_TOOLS_LOG_DEBUG` is not set                                                                                             | N        |
+| **rapids_options**** | A list of valid [Profiling tool options](../../core/docs/spark-profiling-tool.md#qualification-tool-options). Note that (`output-directory`, `auto-tuner`, `combined`) flags are ignored                                                                  | N/A                                                                                                                                           | N        |
+
+If the CLI does not provide an argument `worker_info`, the tool will throw an error and exit.
+The `worker_info` is a yaml file that contains the HW description of the workers. It must contain
+the following properties:
+- `system.numCores`: number of cores of a single worker node
+- `system.memory`: RAM size in MiB of a single node
+- `system.numWorkers`: number of workers
+- `gpu.name`: the accelerator installed on the worker node
+- `gpu.memory`: memory size of the accelerator in MiB. (i.e., 16GB for Nvidia-T4)
+- `softwareProperties`: Spark default-configurations of the target cluster
+
+An example of valid `worker_info.yaml`:
+
+  ```
+  system:
+    numCores: 32
+    memory: 212992MiB
+    numWorkers: 5
+  gpu:
+    memory: 15109MiB
+    count: 4
+    name: T4
+  softwareProperties:
+    spark.driver.maxResultSize: 7680m
+    spark.driver.memory: 15360m
+    spark.executor.cores: '8'
+    spark.executor.instances: '2'
+    spark.executor.memory: 47222m
+    spark.executorEnv.OPENBLAS_NUM_THREADS: '1'
+    spark.scheduler.mode: FAIR
+    spark.sql.cbo.enabled: 'true'
+    spark.ui.port: '0'
+    spark.yarn.am.memory: 640m
+  ```
+
+#### Use case scenario
+
+A typical workflow to successfully run the `profiling` command in local mode is described as follows:
+
+1. Store the Apache Spark event logs (local folder).
+2. On a machine with JDK8+ installed:
+    1. user installs `spark_rapids_user_tools`
+3. User defines the cluster configuration of on-premises platform. Template of the required configs is provided below and
+   the file should be in yaml format mentioned above(`worker_info.yaml`).
+4. User runs profiling tool CLI command.
+
+For each successful execution, the wrapper generates a new directory in the format of
+`prof_<YYYYmmddHHmmss>_<0x%08X>`. The directory contains `profiling_summary.log` in addition to
+the actual folder of the RAPIDS Profiling tool.
+
+   ```
+    ./prof_<YYYYmmddHHmmss>_<0x%08X>/profiling_summary.log
+    ./prof_<YYYYmmddHHmmss>_<0x%08X>/rapids_4_spark_profile/
+   ```
+
+Users can provide a simple yaml file to describe the shape of the worker nodes.
+The CLI is triggered by providing the location where the yaml file is stored `--worker_info $WORKER_INFO_PATH`
+
+    ```
+    # First, create a yaml file as described in previous section
+    $> export WORKER_INFO_PATH=worker-info.yaml
+    # Run the profiling cmd
+    $> spark_rapids_user_tools onprem profiling \
+            --eventlogs $EVENTLOGS \
+            --worker_info $WORKER_INFO_PATH
+    ```
diff --git a/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py
index 046c7c568..32463ec3e 100644
--- a/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py
+++ b/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py
@@ -17,6 +17,7 @@
 
 from spark_rapids_pytools.cloud_api.sp_types import DeployMode, CloudPlatform
 from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
 from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType
 
 
@@ -30,10 +31,7 @@ class CliOnpremLocalMode:  # pylint: disable=too-few-public-methods
     def qualification(cpu_cluster: str = None,
                       eventlogs: str = None,
                       local_folder: str = None,
-                      remote_folder: str = None,
-                      gpu_cluster: str = None,
                       tools_jar: str = None,
-                      credentials_file: str = None,
                       filter_apps: str = QualFilterApp.tostring(QualFilterApp.SPEEDUPS),
                       target_platform: str = None,
                       gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
@@ -41,6 +39,38 @@ def qualification(cpu_cluster: str = None,
                       jvm_heap_size: int = 24,
                       verbose: bool = False,
                       **rapids_options) -> None:
+        """
+        The Qualification tool analyzes Spark events generated from CPU based Spark applications to
+        help quantify the expected acceleration and costs savings of migrating a Spark application
+        or query to GPU. The wrapper downloads dependencies and executes the analysis on the local
+        dev machine
+        :param cpu_cluster: The on-premises cluster on which the Apache Spark applications were executed.
+                Accepted value is valid path to the cluster properties file (json format).
+        :param eventlogs: A comma separated list of urls pointing to event logs in local directory.
+        :param local_folder: Local work-directory path to store the output and to be used as root
+                directory for temporary folders/files. The final output will go into a subdirectory
+                named `qual-${EXEC_ID}` where `exec_id` is an auto-generated unique identifier of the execution.
+        :param tools_jar: Path to a bundled jar including RAPIDS tool. The path is a local filesystem path
+        :param filter_apps:  Filtering criteria of the applications listed in the final STDOUT table is one of
+                the following (`NONE`, `SPEEDUPS`). "`NONE`" means no filter applied. "`SPEEDUPS`" lists all the
+                apps that are either '_Recommended_', or '_Strongly Recommended_' based on speedups.
+        :param target_platform: Cost savings and speedup recommendation for comparable cluster in target_platform
+                based on on-premises cluster configuration. Currently only `dataproc` is supported for
+                target_platform.If not provided, the final report will be limited to GPU speedups only
+                without cost-savings.
+        :param gpu_cluster_recommendation: The type of GPU cluster recommendation to generate.
+               It accepts one of the following ("CLUSTER", "JOB" and the default value "MATCH").
+                "MATCH": keep GPU cluster same number of nodes as CPU cluster;
+                "CLUSTER": recommend optimal GPU cluster by cost for entire cluster;
+                "JOB": recommend optimal GPU cluster by cost per job
+        :param jvm_heap_size: The maximum heap size of the JVM in gigabytes
+        :param verbose: True or False to enable verbosity to the wrapper script
+        :param rapids_options: A list of valid Qualification tool options.
+                Note that the wrapper ignores ["output-directory", "platform"] flags, and it does not support
+                multiple "spark-property" arguments.
+                For more details on Qualification tool options, please visit
+                https://nvidia.github.io/spark-rapids/docs/spark-qualification-tool.html#qualification-tool-options
+        """
         if verbose:
             # when debug is set to true set it in the environment.
             ToolLogging.enable_debug_mode()
@@ -58,16 +88,13 @@ def qualification(cpu_cluster: str = None,
 
         wrapper_qual_options = {
             'platformOpts': {
-                'credentialFile': credentials_file,
                 'deployMode': DeployMode.LOCAL,
                 'targetPlatform': target_platform
             },
             'migrationClustersProps': {
-                'cpuCluster': cpu_cluster,
-                'gpuCluster': gpu_cluster
+                'cpuCluster': cpu_cluster
             },
             'jobSubmissionProps': {
-                'remoteFolder': remote_folder,
                 'platformArgs': {
                     'jvmMaxHeapSize': jvm_heap_size
                 }
@@ -88,10 +115,69 @@ def qualification(cpu_cluster: str = None,
     def is_target_platform_supported(target_platform: str):
         return target_platform == 'dataproc'
 
+    @staticmethod
+    def profiling(worker_info: str = None,
+                  eventlogs: str = None,
+                  local_folder: str = None,
+                  tools_jar: str = None,
+                  jvm_heap_size: int = 24,
+                  verbose: bool = False,
+                  **rapids_options) -> None:
+        """
+        The Profiling tool analyzes both CPU or GPU generated event logs and generates information
+        which can be used for debugging and profiling Apache Spark applications.
+
+        :param  worker_info: A path pointing to a yaml file containing the system information of a
+        worker node. It is assumed that all workers are homogenous.
+        If missing, it throws an error.
+        :param  eventlogs: Event log filenames or directories containing event logs (comma separated).
+        :param local_folder: Local work-directory path to store the output and to be used as root
+        directory for temporary folders/files. The final output will go into a subdirectory called
+        ${local_folder}/prof-${EXEC_ID} where exec_id is an auto-generated unique identifier of the
+        execution. If the argument is NONE, the default value is the env variable
+        RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY if any; or the current working directory.
+        :param tools_jar: Path to a bundled jar including Rapids tool. The path is a local filesystem.
+        If missing, the wrapper downloads the latest rapids-4-spark-tools_*.jar from maven repo
+        :param verbose: True or False to enable verbosity to the wrapper script
+        :param jvm_heap_size: The maximum heap size of the JVM in gigabytes
+        :param rapids_options: A list of valid Profiling tool options.
+        Note that the wrapper ignores ["output-directory", "worker-info"] flags, and it does not support
+        multiple "spark-property" arguments.
+        For more details on Profiling tool options, please visit
+        https://nvidia.github.io/spark-rapids/docs/spark-profiling-tool.html#profiling-tool-options
+        """
+
+        if worker_info is None:
+            raise RuntimeError('worker_info.yaml file containing the system information of '
+                               'a worker node is required to run profiling tool on OnPrem '
+                               'cluster')
+        if verbose:
+            # when debug is set to true set it in the environment.
+            ToolLogging.enable_debug_mode()
+        wrapper_prof_options = {
+            'platformOpts': {
+                'deployMode': DeployMode.LOCAL,
+                'targetPlatform': CloudPlatform.ONPREM
+            },
+            'jobSubmissionProps': {
+                'platformArgs': {
+                     'jvmMaxHeapSize': jvm_heap_size
+                }
+            },
+            'eventlogs': eventlogs,
+            'toolsJar': tools_jar,
+            'autoTunerFileInput': worker_info
+        }
+        ProfilingAsLocal(platform_type=CloudPlatform.ONPREM,
+                         output_folder=local_folder,
+                         wrapper_options=wrapper_prof_options,
+                         rapids_options=rapids_options).launch()
+
 
 class OnPremWrapper:  # pylint: disable=too-few-public-methods
     """
-    A wrapper script to run RAPIDS Accelerator tools (Qualification) on On-prem cluster.
+    A wrapper script to run RAPIDS Accelerator tools (Qualification, Profiling) on On-prem cluster.
     """
     def __init__(self):
         self.qualification = CliOnpremLocalMode.qualification
+        self.profiling = CliOnpremLocalMode.profiling

From df223e18c1238c753abee92ef6d01e3ab2752489 Mon Sep 17 00:00:00 2001
From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com>
Date: Thu, 13 Jul 2023 14:43:17 -0500
Subject: [PATCH 12/14] Add ascli as a new simplified tools command (#426)

* Add ascli as a new simplified qualification command

Fixes #425

- adds a new `ascli qualification` cmd
- platform default value is onPrem
- the platform is not yet detected from the eventlogs path

* Add ascli as a new simplified profiling command

- adds a new `ascli profiling` cmd
- platform default value is onPrem
- the platform is not yet detected from the eventlogs path
- todo: detect whether cluster is a CSP properties or worker's info
  custome file

* Update fire-dependency to BTE 0.5.0
* Profiling command should run without cluster-info

Fixes #436

Allow the user to run profiling without worker_info and then not execute
auto-tuner and just generate profiling output
This implies that users can use the python wrapper deprecating the java
cmd

---------

Signed-off-by: Ahmed Hussein (amahussein) <a@ahussein.me>
---
 user_tools/pyproject.toml                     |   2 +-
 user_tools/setup.cfg                          |   1 +
 .../ascli_cli/__init__.py                     |  15 ++
 .../spark_rapids_pytools/ascli_cli/ascli.py   | 174 ++++++++++++++++++
 .../cloud_api/sp_types.py                     |   5 +-
 .../spark_rapids_pytools/rapids/profiling.py  |  62 ++++---
 .../rapids/qualification.py                   |   3 +-
 .../rapids/rapids_tool.py                     |   2 +-
 .../resources/profiling-conf.yaml             |   1 +
 .../info_recommendations_disabled.ms          |   7 +
 .../wrappers/onprem_wrapper.py                |   4 -
 11 files changed, 247 insertions(+), 29 deletions(-)
 create mode 100644 user_tools/src/spark_rapids_pytools/ascli_cli/__init__.py
 create mode 100644 user_tools/src/spark_rapids_pytools/ascli_cli/ascli.py
 create mode 100644 user_tools/src/spark_rapids_pytools/resources/templates/info_recommendations_disabled.ms

diff --git a/user_tools/pyproject.toml b/user_tools/pyproject.toml
index fd8742757..390e99049 100644
--- a/user_tools/pyproject.toml
+++ b/user_tools/pyproject.toml
@@ -19,7 +19,7 @@ dependencies = [
     "chevron==0.14.0",
     "fastprogress==1.0.3",
     "fastcore==1.5.29",
-    "fire==0.4.0",
+    "fire>=0.5.0",
     "pandas==1.4.3",
     "pyYAML==6.0",
     "tabulate==0.8.10",
diff --git a/user_tools/setup.cfg b/user_tools/setup.cfg
index c2bf5a086..3c69638ba 100644
--- a/user_tools/setup.cfg
+++ b/user_tools/setup.cfg
@@ -1,5 +1,6 @@
 [options.entry_points]
 console_scripts =
     spark_rapids_user_tools = spark_rapids_pytools.wrapper:main
+    ascli = spark_rapids_pytools.ascli_cli.ascli:main
 [options.package_data]
 * = *.json, *.yaml, *.ms, *.sh
diff --git a/user_tools/src/spark_rapids_pytools/ascli_cli/__init__.py b/user_tools/src/spark_rapids_pytools/ascli_cli/__init__.py
new file mode 100644
index 000000000..2ff0b4aca
--- /dev/null
+++ b/user_tools/src/spark_rapids_pytools/ascli_cli/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""init file of the ascli package."""
diff --git a/user_tools/src/spark_rapids_pytools/ascli_cli/ascli.py b/user_tools/src/spark_rapids_pytools/ascli_cli/ascli.py
new file mode 100644
index 000000000..2e46d386e
--- /dev/null
+++ b/user_tools/src/spark_rapids_pytools/ascli_cli/ascli.py
@@ -0,0 +1,174 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""CLI to run tools associated with RAPIDS Accelerator for Apache Spark plugin."""
+
+import fire
+
+from spark_rapids_pytools.cloud_api.sp_types import CloudPlatform, DeployMode
+from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
+from spark_rapids_pytools.rapids.qualification import QualGpuClusterReshapeType, QualFilterApp, QualificationAsLocal
+from spark_rapids_pytools.wrappers.onprem_wrapper import CliOnpremLocalMode
+
+
+class ASCLIWrapper(object):  # pylint: disable=too-few-public-methods
+    """CLI to run tools associated with RAPIDS Accelerator for Apache Spark plugin.
+
+    A wrapper script to run RAPIDS Accelerator tools (Qualification, Profiling, and Bootstrap)
+    locally on the dev machine.
+    """
+
+    def qualification(self,
+                      eventlogs: str = None,
+                      cluster: str = None,
+                      platform: str = CloudPlatform.tostring(CloudPlatform.get_default()),
+                      output_folder: str = None,
+                      target_platform: str = None,
+                      filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
+                      gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
+                          QualGpuClusterReshapeType.get_default())):
+        """Provides a wrapper to simplify the execution of RAPIDS Qualification tool.
+
+        The Qualification tool analyzes Spark events generated from CPU based Spark applications to
+        help quantify the expected acceleration and costs savings of migrating a Spark application
+        or query to GPU. The wrapper downloads dependencies and executes the analysis on the local
+        dev machine.
+
+        :param eventlogs: Event log filenames or CSP storage directories containing event logs
+                (comma separated).
+
+                Skipping this argument requires that the cluster argument points to a valid
+                cluster name on the CSP.
+        :param cluster: Name of cluster or path to cluster-properties. Note that using a "file path"
+                requires the `platform` argument.
+        :param platform: defines one of the following "onprem", "emr", "dataproc", "databricks-aws",
+                and "databricks-azure".
+        :param output_folder: path to store the output
+        :param target_platform: Cost savings and speedup recommendation for comparable cluster in
+                target_platform based on on-premises cluster configuration.
+                Requires cluster.
+        :param gpu_cluster_recommendation: The type of GPU cluster recommendation to generate.
+                Requires "Cluster".
+                It accepts one of the following:
+
+                "MATCH": keep GPU cluster same number of nodes as CPU cluster;
+                "CLUSTER": recommend optimal GPU cluster by cost for entire cluster;
+                "JOB": recommend optimal GPU cluster by cost per job
+        :param filter_apps:  filtering criteria of the applications listed in the final STDOUT table
+                is one of the following (NONE, SPEEDUPS, savings).
+                Requires "Cluster".
+
+                Note that this filter does not affect the CSV report.
+                "NONE" means no filter applied. "SPEEDUPS" lists all the apps that are either
+                'Recommended', or 'Strongly Recommended' based on speedups. "SAVINGS"
+                lists all the apps that have positive estimated GPU savings except for the apps that
+                are "Not Applicable"
+        """
+        runtime_platform = CloudPlatform.fromstring(platform)
+        if runtime_platform == CloudPlatform.ONPREM:
+            # if target_platform is specified, check if it's valid supported platform and filter the
+            # apps based on savings
+            if target_platform is not None:
+                if CliOnpremLocalMode.is_target_platform_supported(target_platform):
+                    if cluster is None:
+                        raise RuntimeError('OnPrem\'s cluster property file required to calculate'
+                                           'savings for ' + target_platform + ' platform')
+                else:
+                    raise RuntimeError(f'The platform [{target_platform}] is currently not supported'
+                                       'to calculate savings from OnPrem cluster')
+            else:
+                # For onPRem runtime, the filter should be reset to speedups when no target_platform
+                # is defined
+                filter_apps = QualFilterApp.tostring(QualFilterApp.SPEEDUPS)
+
+        wrapper_qual_options = {
+            'platformOpts': {
+                'credentialFile': None,
+                'deployMode': DeployMode.LOCAL,
+                'targetPlatform': target_platform
+            },
+            'migrationClustersProps': {
+                'cpuCluster': cluster,
+                'gpuCluster': None
+            },
+            'jobSubmissionProps': {
+                'remoteFolder': None,
+                'platformArgs': {
+                    'jvmMaxHeapSize': 24
+                }
+            },
+            'eventlogs': eventlogs,
+            'filterApps': filter_apps,
+            'toolsJar': None,
+            'gpuClusterRecommendation': gpu_cluster_recommendation,
+            'target_platform': target_platform
+        }
+        tool_obj = QualificationAsLocal(platform_type=runtime_platform,
+                                        output_folder=output_folder,
+                                        wrapper_options=wrapper_qual_options)
+        return tool_obj.launch()
+
+    def profiling(self,
+                  eventlogs: str = None,
+                  cluster: str = None,
+                  platform: str = CloudPlatform.tostring(CloudPlatform.get_default()),
+                  output_folder: str = None):
+        """Provides a wrapper to simplify the execution of RAPIDS Profiling tool.
+
+        The Profiling tool analyzes GPU event logs and generates information
+        which can be used for debugging and profiling Apache Spark applications.
+
+        :param eventlogs: Event log filenames or CSP storage directories containing event logs
+                (comma separated).
+
+                Skipping this argument requires that the cluster argument points to a valid
+                cluster name on the CSP.
+        :param cluster: The cluster on which the Apache Spark applications were executed.
+                It can either be a CSP-cluster name or a path to the cluster/worker's info properties
+                file (json format).
+        :param platform: defines one of the following "onprem", "emr", "dataproc", "databricks-aws",
+                and "databricks-azure".
+        :param output_folder: path to store the output
+        """
+
+        wrapper_prof_options = {
+            'platformOpts': {
+                'credentialFile': None,
+                'deployMode': DeployMode.LOCAL,
+            },
+            'migrationClustersProps': {
+                'gpuCluster': cluster
+            },
+            'jobSubmissionProps': {
+                'remoteFolder': None,
+                'platformArgs': {
+                    'jvmMaxHeapSize': 24
+                }
+            },
+            'eventlogs': eventlogs,
+            'toolsJar': None,
+            'autoTunerFileInput': None
+        }
+        prof_tool_obj = ProfilingAsLocal(platform_type=CloudPlatform.fromstring(platform),
+                                         output_folder=output_folder,
+                                         wrapper_options=wrapper_prof_options)
+        return prof_tool_obj.launch()
+
+
+def main():
+    fire.Fire(ASCLIWrapper())
+
+
+if __name__ == '__main__':
+    main()
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/sp_types.py b/user_tools/src/spark_rapids_pytools/cloud_api/sp_types.py
index 07e8c0246..e378b3c1c 100644
--- a/user_tools/src/spark_rapids_pytools/cloud_api/sp_types.py
+++ b/user_tools/src/spark_rapids_pytools/cloud_api/sp_types.py
@@ -123,9 +123,12 @@ class CloudPlatform(EnumeratedType):
     DATAPROC = 'dataproc'
     EMR = 'emr'
     ONPREM = 'onprem'
-    LOCAL = 'local'
     NONE = 'NONE'
 
+    @classmethod
+    def get_default(cls):
+        return cls.ONPREM
+
 
 class TargetPlatform(EnumeratedType):
     """Determine CostSavings for target platform based on OnPrem cluster configuration"""
diff --git a/user_tools/src/spark_rapids_pytools/rapids/profiling.py b/user_tools/src/spark_rapids_pytools/rapids/profiling.py
index aebc13ea5..97e75a33c 100644
--- a/user_tools/src/spark_rapids_pytools/rapids/profiling.py
+++ b/user_tools/src/spark_rapids_pytools/rapids/profiling.py
@@ -25,7 +25,7 @@
 
 from spark_rapids_pytools.cloud_api.sp_types import ClusterBase
 from spark_rapids_pytools.common.sys_storage import FSUtil
-from spark_rapids_pytools.common.utilities import Utils
+from spark_rapids_pytools.common.utilities import Utils, TemplateGenerator
 from spark_rapids_pytools.rapids.rapids_tool import RapidsJarTool
 
 
@@ -66,24 +66,27 @@ def _process_custom_args(self):
 
     def _process_offline_cluster_args(self):
         offline_cluster_opts = self.wrapper_options.get('migrationClustersProps', {})
-        self._process_gpu_cluster_args(offline_cluster_opts)
-        self._generate_autotuner_input()
+        if self._process_gpu_cluster_args(offline_cluster_opts):
+            # only if we succeed to get the GPU cluster, we can generate auto-tuner-input
+            self._generate_autotuner_input()
+
+    def __load_disabled_recommendation_report(self) -> str:
+        template_file_name = self.ctxt.get_value('toolOutput', 'recommendations', 'disabledInfoMsgTemplate')
+        template_path = Utils.resource_path(f'templates/{template_file_name}')
+        return TemplateGenerator.render_template_file(template_path, {'CLUSTER_ARG': 'cluster'})
 
     def _process_gpu_cluster_args(self, offline_cluster_opts: dict = None):
         gpu_cluster_arg = offline_cluster_opts.get('gpuCluster')
         if gpu_cluster_arg:
             gpu_cluster_obj = self._create_migration_cluster('GPU', gpu_cluster_arg)
             self.ctxt.set_ctxt('gpuClusterProxy', gpu_cluster_obj)
-        else:
-            # If we are here, we know that the workerInfoPath was not set as well.
-            # Then we should fail
-            self.logger.error('The gpuCluster argument was not set. '
-                              'Please make sure to set the arguments properly by either:\n'
-                              '  1. Setting <gpu_cluster> argument and optional set <eventlogs> if '
-                              'the path is not defined by the cluster properties ; or\n'
-                              '  2. Setting both <worker_info> and <eventlogs>')
-            raise RuntimeError('Invalid Arguments: The <gpu_cluster> and <worker_info> arguments are '
-                               'not defined. Aborting Execution.')
+            return True
+        # If we are here, we know that the workerInfoPath was not set as well.
+        # Then we can remind the user that recommendations won't be calculated
+        disabled_recommendations_msg = self.__load_disabled_recommendation_report()
+        self.ctxt.set_ctxt('disabledRecommendationsMsg', disabled_recommendations_msg)
+        self.logger.info(disabled_recommendations_msg)
+        return False
 
     def _generate_autotuner_file_for_cluster(self, file_path: str, cluster_ob: ClusterBase):
         """
@@ -132,11 +135,11 @@ def _generate_autotuner_input(self):
         self.ctxt.set_ctxt('autoTunerFilePath', autotuner_input_path)
 
     def _create_autotuner_rapids_args(self) -> list:
-        # add the autotuner argument
-        autotuner_args = ['--auto-tuner',
-                          '--worker-info',
-                          self.ctxt.get_ctxt('autoTunerFilePath')]
-        return autotuner_args
+        # Add the autotuner argument if the autotunerPath exists
+        autotuner_path = self.ctxt.get_ctxt('autoTunerFilePath')
+        if autotuner_path is None:
+            return []
+        return ['--auto-tuner', '--worker-info', autotuner_path]
 
     def __read_single_app_output(self, file_path: str) -> (str, List[str], List[str]):
         def split_list_str_by_pattern(input_seq: List[str], pattern: str) -> int:
@@ -206,9 +209,15 @@ def _write_summary(self):
         print(Utils.gen_multiline_str(self._report_tool_full_location(),
                                       self.ctxt.get_ctxt('wrapperOutputContent')))
 
-    def _process_output(self):
-        if not self._evaluate_rapids_jar_tool_output_exist():
-            return
+    def __generate_report_no_recommendations(self):
+        prof_app_dirs = FSUtil.get_subdirectories(self.ctxt.get_rapids_output_folder())
+        wrapper_content = [Utils.gen_report_sec_header('Recommendations'),
+                           self.ctxt.get_ctxt('disabledRecommendationsMsg'),
+                           Utils.gen_report_sec_header('Profiling status'),
+                           f'Total application profiled: {len(prof_app_dirs)}']
+        self.ctxt.set_ctxt('wrapperOutputContent', wrapper_content)
+
+    def __generate_report_with_recommendations(self):
         prof_app_dirs = FSUtil.get_subdirectories(self.ctxt.get_rapids_output_folder())
         profiling_log = self.ctxt.get_value('toolOutput', 'recommendations', 'fileName')
         recommendations_table = []
@@ -248,6 +257,17 @@ def _process_output(self):
                            tabulate(recommendations_table, headers, tablefmt='grid')]
         self.ctxt.set_ctxt('wrapperOutputContent', wrapper_content)
 
+    def _process_output(self):
+        if not self._evaluate_rapids_jar_tool_output_exist():
+            return
+
+        if self.ctxt.get_ctxt('autoTunerFilePath'):
+            # if autotuner is enabled, generate full recommendations summary
+            self.__generate_report_with_recommendations()
+        else:
+            # generate a brief summary
+            self.__generate_report_no_recommendations()
+
     def _init_rapids_arg_list(self) -> List[str]:
         return self._create_autotuner_rapids_args()
 
diff --git a/user_tools/src/spark_rapids_pytools/rapids/qualification.py b/user_tools/src/spark_rapids_pytools/rapids/qualification.py
index cccb14d8e..405d60a49 100644
--- a/user_tools/src/spark_rapids_pytools/rapids/qualification.py
+++ b/user_tools/src/spark_rapids_pytools/rapids/qualification.py
@@ -207,7 +207,7 @@ def _process_cpu_cluster_args(self, offline_cluster_opts: dict = None):
             cpu_cluster_obj = self._create_migration_cluster('CPU', cpu_cluster_arg)
             self.ctxt.set_ctxt('cpuClusterProxy', cpu_cluster_obj)
 
-    def _process_gpu_cluster_args(self, offline_cluster_opts: dict = None):
+    def _process_gpu_cluster_args(self, offline_cluster_opts: dict = None) -> bool:
         gpu_cluster_arg = offline_cluster_opts.get('gpuCluster')
         if gpu_cluster_arg is None:
             self.logger.info('Creating GPU cluster by converting the CPU cluster instances to GPU supported types')
@@ -217,6 +217,7 @@ def _process_gpu_cluster_args(self, offline_cluster_opts: dict = None):
         else:
             gpu_cluster_obj = self._create_migration_cluster('GPU', gpu_cluster_arg)
         self.ctxt.set_ctxt('gpuClusterProxy', gpu_cluster_obj)
+        return True
 
     def _process_offline_cluster_args(self):
         offline_cluster_opts = self.wrapper_options.get('migrationClustersProps', {})
diff --git a/user_tools/src/spark_rapids_pytools/rapids/rapids_tool.py b/user_tools/src/spark_rapids_pytools/rapids/rapids_tool.py
index 1a5f19660..f466131a0 100644
--- a/user_tools/src/spark_rapids_pytools/rapids/rapids_tool.py
+++ b/user_tools/src/spark_rapids_pytools/rapids/rapids_tool.py
@@ -507,7 +507,7 @@ def _process_rapids_args(self):
     def _process_offline_cluster_args(self):
         pass
 
-    def _process_gpu_cluster_args(self, offline_cluster_opts: dict = None):
+    def _process_gpu_cluster_args(self, offline_cluster_opts: dict = None) -> bool:
         pass
 
     def _copy_dependencies_to_remote(self):
diff --git a/user_tools/src/spark_rapids_pytools/resources/profiling-conf.yaml b/user_tools/src/spark_rapids_pytools/resources/profiling-conf.yaml
index 3376b1b60..443e0dc65 100644
--- a/user_tools/src/spark_rapids_pytools/resources/profiling-conf.yaml
+++ b/user_tools/src/spark_rapids_pytools/resources/profiling-conf.yaml
@@ -2,6 +2,7 @@ toolOutput:
   subFolder: rapids_4_spark_profile
   recommendations:
     fileName: profile.log
+    disabledInfoMsgTemplate: 'info_recommendations_disabled.ms'
     headers:
       section: '### D. Recommended Configuration ###'
       sparkProperties: 'Spark Properties:'
diff --git a/user_tools/src/spark_rapids_pytools/resources/templates/info_recommendations_disabled.ms b/user_tools/src/spark_rapids_pytools/resources/templates/info_recommendations_disabled.ms
new file mode 100644
index 000000000..92ddcbc92
--- /dev/null
+++ b/user_tools/src/spark_rapids_pytools/resources/templates/info_recommendations_disabled.ms
@@ -0,0 +1,7 @@
+Recommendations cannot be generated.
+    Reason: The cluster information was not set.
+    To enable recommendation, the cluster properties must be defined.
+    Please make sure to set the arguments properly by either:
+        1. Setting <{{ CLUSTER_ARG }}> argument and optional set <eventlogs> if
+            the path is not defined by the cluster properties; or
+        2. Setting both <{{ CLUSTER_ARG }}> and <eventlogs>
diff --git a/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py
index 32463ec3e..f79a141ba 100644
--- a/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py
+++ b/user_tools/src/spark_rapids_pytools/wrappers/onprem_wrapper.py
@@ -147,10 +147,6 @@ def profiling(worker_info: str = None,
         https://nvidia.github.io/spark-rapids/docs/spark-profiling-tool.html#profiling-tool-options
         """
 
-        if worker_info is None:
-            raise RuntimeError('worker_info.yaml file containing the system information of '
-                               'a worker node is required to run profiling tool on OnPrem '
-                               'cluster')
         if verbose:
             # when debug is set to true set it in the environment.
             ToolLogging.enable_debug_mode()

From 237a22a6225c3d7464e8b59ef30ec6ea31e97142 Mon Sep 17 00:00:00 2001
From: Cindy Jiang <47068112+cindyyuanjiang@users.noreply.github.com>
Date: Thu, 13 Jul 2023 15:39:15 -0700
Subject: [PATCH 13/14] update more details for databricks azure user
 qualification tool doc (#438)

Signed-off-by: Cindy Jiang <cindyj@nvidia.com>
---
 .../docs/user-tools-databricks-azure.md       | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/user_tools/docs/user-tools-databricks-azure.md b/user_tools/docs/user-tools-databricks-azure.md
index 6d6df86a0..9313509ff 100644
--- a/user_tools/docs/user-tools-databricks-azure.md
+++ b/user_tools/docs/user-tools-databricks-azure.md
@@ -12,8 +12,8 @@ The tool currently only supports event logs stored on ABFS ([Azure Blob File Sys
 
 - Install the Databricks CLI. Follow the instructions on [Install the CLI](https://docs.databricks.com/dev-tools/cli/index.html#install-the-cli).
 - Set the configuration settings and credentials of the Databricks CLI:
-  - Set up authentication using a Databricks personal access token by following these [instructions](https://docs.databricks.com/dev-tools/cli/index.html#set-up-authentication-using-a-databricks-personal-access-token)
-  - Test the authentication setup by following these [instructions](https://docs.databricks.com/dev-tools/cli/index.html#test-your-authentication-setup)
+  - Set up authentication using a Databricks personal access token by following these [instructions](https://docs.databricks.com/dev-tools/cli/index.html#set-up-authentication-using-a-databricks-personal-access-token).
+  - Test the authentication setup by following these [instructions](https://docs.databricks.com/dev-tools/cli/index.html#test-your-authentication-setup).
   - Verify that the access credentials are stored in the file `~/.databrickscfg` on Unix, Linux, or macOS, or in another file defined by environment variable `DATABRICKS_CONFIG_FILE`.
 
 ### 2.Azure CLI
@@ -22,12 +22,13 @@ The tool currently only supports event logs stored on ABFS ([Azure Blob File Sys
 - Set the configuration settings and credentials of the Azure CLI:
   - Set up the authentication by following these [instructions](https://learn.microsoft.com/en-us/cli/azure/authenticate-azure-cli).
   - Configure the Azure CLI by following these [instructions](https://learn.microsoft.com/en-us/cli/azure/azure-cli-configuration).
-    - Location is used for retreving instance type description (default is `westus`).
+    - `location` is used for retreving instance type description (default is `westus`).
+    - Verify that the configurations are stored in the file `$AZURE_CONFIG_DIR/config` where the default value of `AZURE_CONFIG_DIR` is `$HOME/.azure` on Linux or macOS.
 
 ### 3.RAPIDS tools
 
 - Spark event logs:
-  - The RAPIDS tools can process Apache Spark CPU event logs from Spark 2.0 or higher (raw, .lz4, .lzf, .snappy, .zstd)
+  - The RAPIDS tools can process Apache Spark CPU event logs from Spark 2.0 or higher (raw, .lz4, .lzf, .snappy, .zstd).
   - For `qualification` commands, the event logs need to be archived to an accessible local or ABFS folder.
 
 ### 4.Install the package
@@ -36,7 +37,7 @@ The tool currently only supports event logs stored on ABFS ([Azure Blob File Sys
   - pip:  `pip install spark-rapids-user-tools`
   - wheel-file: `pip install <wheel-file>`
   - from source: `pip install -e .`
-- verify the command is installed correctly by running
+- Verify the command is installed correctly by running
   ```bash
     spark_rapids_user_tools databricks-azure --help
   ```
@@ -47,8 +48,8 @@ Before running any command, you can set environment variables to specify configu
 - RAPIDS variables have a naming pattern `RAPIDS_USER_TOOLS_*`:
   - `RAPIDS_USER_TOOLS_CACHE_FOLDER`: specifies the location of a local directory that the RAPIDS-cli uses to store and cache the downloaded resources. The default is `/var/tmp/spark_rapids_user_tools_cache`.  Note that caching the resources locally has an impact on the total execution time of the command.
   - `RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY`: specifies the location of a local directory that the RAPIDS-cli uses to generate the output. The wrapper CLI arguments override that environment variable (`--local_folder` for Qualification).
-- For Databricks CLI, some environment variables can be set and picked by the RAPIDS-user tools such as: `DATABRICKS_CONFIG_FILE`, `DATABRICKS_HOST` and `DATABRICKS_TOKEN`. See the description of the variables in [Environment variables](https://docs.databricks.com/dev-tools/auth.html#environment-variables).
-- For Azure CLI, some environment variables can be set and picked by the RAPIDS-user tools such as: `AZURE_CONFIG_FILE` and `AZURE_DEFAULTS_LOCATION`.
+- For Databricks CLI, some environment variables can be set and picked up by the RAPIDS-user tools such as: `DATABRICKS_CONFIG_FILE`, `DATABRICKS_HOST` and `DATABRICKS_TOKEN`. See the description of the variables in [Environment variables](https://docs.databricks.com/dev-tools/auth.html#environment-variables).
+- For Azure CLI, some environment variables can be set and picked up by the RAPIDS-user tools such as: `AZURE_CONFIG_FILE` and `AZURE_DEFAULTS_LOCATION`.
 
 ## Qualification command
 
@@ -62,7 +63,7 @@ spark_rapids_user_tools databricks-azure qualification --help
 The local deployment runs on the local development machine. It requires:
 1. Installing and configuring the Databricks and Azure CLI
 2. Java 1.8+ development environment
-3. Internet access to download JAR dependencies from mvn: `spark-*.jar`, and `hadoop-azure-*.jar`
+3. Internet access to download JAR dependencies from mvn: `spark-*.jar` and `hadoop-azure-*.jar`
 4. Dependencies are cached on the local disk to reduce the overhead of the download.
 
 #### Command options
@@ -70,7 +71,7 @@ The local deployment runs on the local development machine. It requires:
 | Option                         | Description                                                                                                                                                                                                                                                                                                                                                                                                 | Default                                                                                                                                                                                                                                         | Required |
 |--------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:|
 | **cpu_cluster**                | The Databricks-cluster on which the Apache Spark applications were executed. Accepted values are an Databricks-cluster name, or a valid path to the cluster properties file (json format) generated by Databricks CLI command `databricks clusters get --cluster-name`                                                                                                                                      | N/A                                                                                                                                                                                                                                             |     Y    |
-| **eventlogs**                  | A comma seperated list of ABFS urls pointing to event logs or ABFS directory                                                                                                                                                                                                                                                                                                                                | Reads the Spark's property `spark.eventLog.dir` defined in `cpu_cluster`. This property should be included in the output of `databricks clusters get --cluster-name`. Note that the wrapper will raise an exception if the property is not set. |     N    |
+| **eventlogs**                  | A comma seperated list of ABFS urls pointing to event logs or ABFS directory, or local event log filenames or directory                                                                                                                                                                                                                                                                                     | Reads the Spark's property `spark.eventLog.dir` defined in `cpu_cluster`. This property should be included in the output of `databricks clusters get --cluster-name`. Note that the wrapper will raise an exception if the property is not set. |     N    |
 | **remote_folder**              | The ABFS folder where the output of the wrapper's output is copied. If missing, the output will be available only on local disk                                                                                                                                                                                                                                                                             | N/A                                                                                                                                                                                                                                             |     N    |
 | **gpu_cluster**                | The Databricks-cluster on which the Spark applications is planned to be migrated. The argument can be an Databricks-cluster or a valid path to the cluster's properties file (json format) generated by the Databricks CLI `databricks clusters get --cluster-name` command                                                                                                                                 | The wrapper maps the Azure machine instances of the original cluster into Azure instances that support GPU acceleration.                                                                                                                        |     N    |
 | **local_folder**               | Local work-directory path to store the output and to be used as root directory for temporary folders/files. The final output will go into a subdirectory named `qual-${EXEC_ID}` where `exec_id` is an auto-generated unique identifier of the execution.                                                                                                                                                   | If the argument is NONE, the default value is the env variable `RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY` if any; or the current working directory.                                                                                                   |     N    |

From a849c0dd79a9c8a5d970277968302bf14616d6d1 Mon Sep 17 00:00:00 2001
From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com>
Date: Thu, 13 Jul 2023 18:06:20 -0500
Subject: [PATCH 14/14] Bump build version to 23.06.2 (#439)

Signed-off-by: Ahmed Hussein (amahussein) <a@ahussein.me>
---
 core/pom.xml                                    | 2 +-
 user_tools/src/spark_rapids_pytools/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/pom.xml b/core/pom.xml
index 80638114d..2f319ba10 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -23,7 +23,7 @@
     <artifactId>rapids-4-spark-tools_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark tools</name>
     <description>RAPIDS Accelerator for Apache Spark tools</description>
-    <version>23.06.0-SNAPSHOT</version>
+    <version>23.06.2-SNAPSHOT</version>
     <packaging>jar</packaging>
     <url>http://github.com/NVIDIA/spark-rapids-tools</url>
 
diff --git a/user_tools/src/spark_rapids_pytools/__init__.py b/user_tools/src/spark_rapids_pytools/__init__.py
index 4f6412c0e..6272fbab9 100644
--- a/user_tools/src/spark_rapids_pytools/__init__.py
+++ b/user_tools/src/spark_rapids_pytools/__init__.py
@@ -16,5 +16,5 @@
 
 from spark_rapids_pytools.build import get_version
 
-VERSION = '23.06.0'
+VERSION = '23.06.2'
 __version__ = get_version(VERSION)