Adds DeviceOptions provider

Adds a `DeviceOptions` provider and updates the OptionsContext and OptionsProviders to use `llvm::Error`s instead of `mlirtrt::Status` since the latter is not accessible to the OptionsContext.
NVIDIA · Dec 5, 2024 · 78ce313 · 78ce313
1 parent 97b13ec
commit 78ce313
Show file tree

Hide file tree

Showing 10 changed files with 172 additions and 99 deletions.
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/Client.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/Client.h
@@ -28,7 +28,7 @@
 #define MLIR_TENSORRT_COMPILER_CLIENT
 
 #include "mlir-executor/Support/Status.h"
-#include "mlir-tensorrt/Compiler/Options.h"
+#include "mlir-tensorrt/Compiler/OptionsProviders.h"
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Support/TypeID.h"

diff --git a/.../include/mlir-tensorrt/Compiler/Options.h → ...mlir-tensorrt/Compiler/OptionsProviders.h b/.../include/mlir-tensorrt/Compiler/Options.h → ...mlir-tensorrt/Compiler/OptionsProviders.h
@@ -1,4 +1,4 @@
-//===- Options.h ------------------------------------------------*- C++ -*-===//
+//===- OptionsProviders.h ---------------------------------------*- C++ -*-===//
 //
 // SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
 // All rights reserved.
@@ -27,13 +27,21 @@
 #include "mlir-tensorrt-dialect/Utils/Options.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
 #include <string>
 
 namespace mlirtrt::compiler {
 
+struct OptionsProvider {
+  virtual ~OptionsProvider() {}
+  virtual void addToOptions(mlir::OptionsContext &context) = 0;
+
+  virtual llvm::Error finalize() { return llvm::Error::success(); }
+};
+
 /// DebugOptions are options that are common to different compiler API
 /// interfaces.
-struct DebugOptions {
+struct DebugOptions : public OptionsProvider {
   /// A directory path where the IR will be dumped during compilation
   /// using the `mlir-print-ir-tree-dir` mechanism.
   std::string dumpIRPath = "";
@@ -48,7 +56,7 @@ struct DebugOptions {
   /// `-debug-types=...` from the command line.
   mlir::SmallVector<std::string> llvmDebugTypes = {};
 
-  void addToOptions(mlir::OptionsContext &context) {
+  void addToOptions(mlir::OptionsContext &context) override {
     context.addOption("mlir-print-ir-tree-dir", dumpIRPath, llvm::cl::init(""));
     context.addOption("debug", enableLLVMDebugFlag);
     context.addList<std::string>("debug-only", llvmDebugTypes,
@@ -57,19 +65,57 @@ struct DebugOptions {
   }
 };
 
-struct ExecutorOptions {
+struct ExecutorOptions : public OptionsProvider {
   /// The host index bit-width.
   int64_t executorIndexBitwidth{64};
 
   /// Whether to pass memref's as struct/table in function calls.
   bool executorUsePackedMemRefCConv{true};
 
-  void addToOptions(mlir::OptionsContext &context) {
+  void addToOptions(mlir::OptionsContext &context) override {
     context.addOption("executor-index-bitwidth", executorIndexBitwidth,
                       llvm::cl::init(64));
   }
 };
 
+struct DeviceOptions : public OptionsProvider {
+  /// Target device compute capability (SM version)
+  int64_t computeCapability;
+
+  /// Target device max shared memory per block (kilobytes)
+  int64_t maxSharedMemoryPerBlockKb;
+
+  /// Target device maximum 4-byte register sper block.
+  uint64_t maxRegistersPerBlock;
+
+  void addToOptions(mlir::OptionsContext &context) override {
+    context.addOption(
+        "device-compute-capability", computeCapability, llvm::cl::init(64),
+        llvm::cl::desc("Sets the device compute capbility. Only relevant "
+                       "if '--device-infer-from-host=false'"));
+    context.addOption("device-max-shared-memory-per-block-kb",
+                      maxSharedMemoryPerBlockKb, llvm::cl::init(0));
+    context.addOption("device-max-registers-per-block", maxRegistersPerBlock,
+                      llvm::cl::init(0));
+    context.addOption("device-infer-from-host", shouldInferFromHost,
+                      llvm::cl::init(true),
+                      llvm::cl::desc("Infers device information from host"));
+  }
+
+  llvm::Error finalize() override;
+
+  // TODO (pranavm): This should ideally be private but needs to be set from
+  // `populateStablehloClusteringPipelineOpts`.
+  /// Whether to ignore `deviceX` options and instead infer them from the GPUs
+  /// on the host system running the compilation.
+  bool shouldInferFromHost = false;
+
+private:
+  /// Infer target device information from the first visible CUDA device on the
+  /// host executing this code.
+  llvm::Error inferDeviceOptionsFromHost();
+};
+
 } // namespace mlirtrt::compiler
 
 #endif // MLIR_TENSORRT_COMPILER_OPTIONS
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h
@@ -32,6 +32,7 @@
 #include "mlir-tensorrt/Dialect/Plan/IR/Plan.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
 #include <functional>
 
 namespace mlirtrt::compiler {
@@ -71,14 +72,15 @@ optionsCreateFromArgs(const CompilerClient &client,
         llvm::iterator_range(args), err);
   }
 
-  // TODO: Figure out whether to add a method in the base class like
-  // "finalizeOptions" or a callback here, or something else if
-  // `inferDeviceOptionsFromHost` is unique to StableHLO.
-  //
-  // Populate device options from host information.
-  Status inferStatus = result->inferDeviceOptionsFromHost();
-  if (!inferStatus.isOk())
-    return inferStatus;
+  llvm::Error finalizeStatus = result->finalize();
+
+  std::optional<std::string> errMsg{};
+  llvm::handleAllErrors(
+      std::move(finalizeStatus),
+      [&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); });
+
+  if (errMsg)
+    return getInternalErrorStatus(errMsg->c_str());
 
   return std::unique_ptr<mlir::OptionsContext>(result.release());
 }

diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h
@@ -36,7 +36,7 @@
 #include "mlir-tensorrt-dialect/Utils/OptionsBundle.h"
 #include "mlir-tensorrt/Compiler/Client.h"
 #include "mlir-tensorrt/Compiler/Extension.h"
-#include "mlir-tensorrt/Compiler/Options.h"
+#include "mlir-tensorrt/Compiler/OptionsProviders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Support/TypeID.h"
@@ -52,40 +52,15 @@ namespace mlirtrt::compiler {
 class StableHloToExecutableTask;
 
 struct StableHLOToExecutableOptions
-    : public mlir::OptionsBundle<DebugOptions, ExecutorOptions> {
+    : public mlir::OptionsBundle<DebugOptions, ExecutorOptions, DeviceOptions> {
   /// Initializes the options. The extensions in the provided registry
   /// must be extensions for the StableHloToExecutable task.
   StableHLOToExecutableOptions(TaskExtensionRegistry extensions);
 
-  /// Set the target device compute capability (SM version) and max shared
-  /// memory per block (in kilobytes). The `maxSharedMemoryPerBlockKb` is the
-  /// maximum shared memory per block allowed for kernels and is passed to the
-  /// TensorRT builder.
-  StableHLOToExecutableOptions &
-  setDeviceOptions(int64_t computeCapability,
-                   int64_t maxSharedMemoryPerBlockKb);
-
-  /// Infer target device information from the first visible CUDA device on the
-  /// host executing this code.
-  Status inferDeviceOptionsFromHost();
-
   /// Return the hash of the options. Returns `nullopt` when the TensorRT
   /// layer metadata callback is set since that can't be reliably hashed.
   std::optional<llvm::hash_code> getHash() const override;
 
-  /// Target device compute capability (SM version)
-  int64_t deviceComputeCapability;
-
-  /// Target device max shared memory per block (kilobytes)
-  int64_t deviceMaxSharedMemoryPerBlockKb;
-
-  /// Target device maximum 4-byte register sper block.
-  uint64_t deviceMaxRegistersPerBlock;
-
-  /// Whether to ignore `deviceX` options and instead infer them from the GPUs
-  /// on the host system running the compilation.
-  bool shouldInferDeviceOptionsFromHost = false;
-
   /// Whether to disallow host tensors in TensorRT clusters.
   bool disallowHostTensorsInTensorRTClusters = false;
 

diff --git a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp
@@ -169,10 +169,15 @@ MTRT_Status mtrtStableHloToExecutableOptionsCreate(
   auto result =
       std::make_unique<StableHLOToExecutableOptions>(std::move(extensions));
 
-  /// Populate device options from host information.
-  Status inferStatus = result->inferDeviceOptionsFromHost();
-  if (!inferStatus.isOk())
-    return wrap(inferStatus);
+  llvm::Error finalizeStatus = result->finalize();
+
+  std::optional<std::string> errMsg{};
+  llvm::handleAllErrors(
+      std::move(finalizeStatus),
+      [&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); });
+
+  if (errMsg)
+    return wrap(getInternalErrorStatus(errMsg->c_str()));
 
   *options = wrap(result.release());
   return mtrtStatusGetOk();
@@ -209,10 +214,15 @@ MTRT_Status mtrtStableHloToExecutableOptionsCreateFromArgs(
         "failed to parse options string {0} due to error: {1}", line, err));
   }
 
-  /// Populate device options from host information.
-  Status inferStatus = result->inferDeviceOptionsFromHost();
-  if (!inferStatus.isOk())
-    return wrap(inferStatus);
+  llvm::Error finalizeStatus = result->finalize();
+
+  std::optional<std::string> errMsg{};
+  llvm::handleAllErrors(
+      std::move(finalizeStatus),
+      [&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); });
+
+  if (errMsg)
+    return wrap(getInternalErrorStatus(errMsg->c_str()));
 
   *options = wrap(result.release());
   return mtrtStatusGetOk();

diff --git a/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt b/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt
@@ -2,6 +2,7 @@ add_mlir_tensorrt_library(MLIRTensorRTCompilerClient
     Client.cpp
     Extension.cpp
     OptionsRegistry.cpp
+    OptionsProviders.cpp
     PARTIAL_SOURCES_INTENDED
 
     LINK_LIBS PUBLIC

diff --git a/mlir-tensorrt/compiler/lib/Compiler/OptionsProviders.cpp b/mlir-tensorrt/compiler/lib/Compiler/OptionsProviders.cpp
@@ -0,0 +1,64 @@
+//===- OptionsProviders.cpp -------------------------------------*- C++ -*-===//
+//
+// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
+// All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Data structures and functions for manipulating compiler options.
+///
+//===----------------------------------------------------------------------===//
+#include "mlir-tensorrt/Compiler/OptionsProviders.h"
+#include "cuda_runtime_api.h"
+#include "llvm/Support/Error.h"
+
+// TODO (pranavm): Check if we can just reuse `DeviceInfo.cpp`?
+llvm::Error mlirtrt::compiler::DeviceOptions::inferDeviceOptionsFromHost() {
+  cudaDeviceProp properties;
+  cudaError_t err = cudaGetDeviceProperties(&properties, 0);
+  if (err != cudaSuccess)
+    return llvm::createStringError("failed to get cuda device properties");
+
+  int ccMajor = 0;
+  int ccMinor = 0;
+  err = cudaDeviceGetAttribute(
+      &ccMajor, cudaDeviceAttr::cudaDevAttrComputeCapabilityMajor, 0);
+  if (err != cudaSuccess)
+    return llvm::createStringError(
+        "failed to get cuda device compute capability");
+  err = cudaDeviceGetAttribute(
+      &ccMinor, cudaDeviceAttr::cudaDevAttrComputeCapabilityMinor, 0);
+  if (err != cudaSuccess)
+    return llvm::createStringError(
+        "failed to get cuda device compute capability");
+
+  // We want SM version as a single number.
+  int64_t smVersion = ccMajor * 10 + ccMinor;
+  this->computeCapability = smVersion;
+  this->maxSharedMemoryPerBlockKb = properties.sharedMemPerBlock / 1024;
+  this->maxRegistersPerBlock = properties.regsPerBlock;
+  return llvm::Error::success();
+}
+
+llvm::Error mlirtrt::compiler::DeviceOptions::finalize() {
+  if (shouldInferFromHost) {
+    // TODO (pranavm): How to check whether options were provided?
+    // Does llvm::cl have a notion of mutually exclusive options like Python's
+    // argparse?
+    return inferDeviceOptionsFromHost();
+  }
+  return llvm::Error::success();
+}
diff --git a/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp
@@ -30,7 +30,7 @@
 #include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h"
 #include "mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h"
 #include "mlir-tensorrt/Compiler/Extension.h"
-#include "mlir-tensorrt/Compiler/Options.h"
+#include "mlir-tensorrt/Compiler/OptionsProviders.h"
 #include "mlir-tensorrt/Compiler/OptionsRegistry.h"
 #include "mlir-tensorrt/Compiler/TensorRTExtension/TensorRTExtension.h"
 #include "mlir-tensorrt/Conversion/Passes.h"
@@ -163,56 +163,10 @@ StableHLOToExecutableOptions::StableHLOToExecutableOptions(
       llvm::cl::desc("Don't allow TensorRt clusters to contain host tensor "
                      "calculations (but they can still be inputs)"));
 
-  addOption("device-compute-capability", deviceComputeCapability,
-            llvm::cl::init(64),
-            llvm::cl::desc("Sets the device compute capbility. Only relevant "
-                           "if '--device-infer-from-host=false'"));
-  addOption("device-max-shared-memory-per-block-kb",
-            deviceMaxSharedMemoryPerBlockKb, llvm::cl::init(0));
-  addOption("device-max-registers-per-block", deviceMaxRegistersPerBlock,
-            llvm::cl::init(0));
-  addOption("device-infer-from-host", shouldInferDeviceOptionsFromHost,
-            llvm::cl::init(true),
-            llvm::cl::desc("Infers device information from host"));
   addOption("entrypoint", entrypoint, llvm::cl::init("main"),
             llvm::cl::desc("entrypoint function name"));
 }
 
-StableHLOToExecutableOptions &StableHLOToExecutableOptions::setDeviceOptions(
-    int64_t computeCapability, int64_t maxSharedMemoryPerBlockKb) {
-  deviceMaxSharedMemoryPerBlockKb = maxSharedMemoryPerBlockKb;
-  deviceComputeCapability = computeCapability;
-  return *this;
-}
-
-Status StableHLOToExecutableOptions::inferDeviceOptionsFromHost() {
-  cudaDeviceProp properties;
-  cudaError_t err = cudaGetDeviceProperties(&properties, 0);
-  if (err != cudaSuccess)
-    return getStatusWithMsg(StatusCode::InternalError,
-                            "failed to get cuda device properties");
-
-  int ccMajor = 0;
-  int ccMinor = 0;
-  err = cudaDeviceGetAttribute(
-      &ccMajor, cudaDeviceAttr::cudaDevAttrComputeCapabilityMajor, 0);
-  if (err != cudaSuccess)
-    return getStatusWithMsg(StatusCode::InternalError,
-                            "failed to get cuda device compute capability");
-  err = cudaDeviceGetAttribute(
-      &ccMinor, cudaDeviceAttr::cudaDevAttrComputeCapabilityMinor, 0);
-  if (err != cudaSuccess)
-    return getStatusWithMsg(StatusCode::InternalError,
-                            "failed to get cuda device compute capability");
-
-  // We want SM version as a single number.
-  int64_t smVersion = ccMajor * 10 + ccMinor;
-  this->deviceComputeCapability = smVersion;
-  this->deviceMaxSharedMemoryPerBlockKb = properties.sharedMemPerBlock / 1024;
-  this->deviceMaxRegistersPerBlock = properties.regsPerBlock;
-  return Status::getOk();
-}
-
 std::optional<llvm::hash_code> StableHLOToExecutableOptions::getHash() const {
   // If a callback is provided, we have no way of reliably hashing it.
   if (layerMetadataCallback)
@@ -482,6 +436,7 @@ StableHloToExecutableTask::compileStableHLOToExecutable(
 //===----------------------------------------------------------------------===//
 
 namespace {
+// TODO (pranavm): Any way to reuse `DeviceOptions` here?
 struct ClusteringPipelineCliOpts
     : public PassPipelineOptions<ClusteringPipelineCliOpts> {
   Option<bool> lowerStablehloControlFlow{
@@ -514,11 +469,13 @@ static StableHLOToExecutableOptions populateStablehloClusteringPipelineOpts(
   TaskExtensionRegistry extensions;
   extensions.getOrCreateExtension<StableHLOToExecutableTensorRTExtension>();
 
+  // TODO (pranavm): Why is there this other way of setting up options?
   StableHLOToExecutableOptions opts(std::move(extensions));
-  opts.deviceComputeCapability = cliOpts.deviceComputeCapability;
-  opts.deviceMaxSharedMemoryPerBlockKb =
+  opts.get<DeviceOptions>().computeCapability = cliOpts.deviceComputeCapability;
+  opts.get<DeviceOptions>().maxSharedMemoryPerBlockKb =
       cliOpts.deviceMaxSharedMemoryPerBlockKb;
-  opts.shouldInferDeviceOptionsFromHost = cliOpts.inferDeviceOptionsFromHost;
+  opts.get<DeviceOptions>().shouldInferFromHost =
+      cliOpts.inferDeviceOptionsFromHost;
   opts.entrypoint = cliOpts.entrypoint;
   return opts;
 }