Skip to content

Commit

Permalink
Adds DeviceOptions provider
Browse files Browse the repository at this point in the history
Adds a `DeviceOptions` provider and updates the OptionsContext and OptionsProviders
to use `llvm::Error`s instead of `mlirtrt::Status` since the latter is not accessible
to the OptionsContext.
  • Loading branch information
pranavm-nvidia committed Dec 5, 2024
1 parent 97b13ec commit 78ce313
Show file tree
Hide file tree
Showing 10 changed files with 172 additions and 99 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#define MLIR_TENSORRT_COMPILER_CLIENT

#include "mlir-executor/Support/Status.h"
#include "mlir-tensorrt/Compiler/Options.h"
#include "mlir-tensorrt/Compiler/OptionsProviders.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Support/TypeID.h"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//===- Options.h ------------------------------------------------*- C++ -*-===//
//===- OptionsProviders.h ---------------------------------------*- C++ -*-===//
//
// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
// All rights reserved.
Expand Down Expand Up @@ -27,13 +27,21 @@
#include "mlir-tensorrt-dialect/Utils/Options.h"
#include "mlir/Support/LLVM.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include <string>

namespace mlirtrt::compiler {

struct OptionsProvider {
virtual ~OptionsProvider() {}
virtual void addToOptions(mlir::OptionsContext &context) = 0;

virtual llvm::Error finalize() { return llvm::Error::success(); }
};

/// DebugOptions are options that are common to different compiler API
/// interfaces.
struct DebugOptions {
struct DebugOptions : public OptionsProvider {
/// A directory path where the IR will be dumped during compilation
/// using the `mlir-print-ir-tree-dir` mechanism.
std::string dumpIRPath = "";
Expand All @@ -48,7 +56,7 @@ struct DebugOptions {
/// `-debug-types=...` from the command line.
mlir::SmallVector<std::string> llvmDebugTypes = {};

void addToOptions(mlir::OptionsContext &context) {
void addToOptions(mlir::OptionsContext &context) override {
context.addOption("mlir-print-ir-tree-dir", dumpIRPath, llvm::cl::init(""));
context.addOption("debug", enableLLVMDebugFlag);
context.addList<std::string>("debug-only", llvmDebugTypes,
Expand All @@ -57,19 +65,57 @@ struct DebugOptions {
}
};

struct ExecutorOptions {
struct ExecutorOptions : public OptionsProvider {
/// The host index bit-width.
int64_t executorIndexBitwidth{64};

/// Whether to pass memref's as struct/table in function calls.
bool executorUsePackedMemRefCConv{true};

void addToOptions(mlir::OptionsContext &context) {
void addToOptions(mlir::OptionsContext &context) override {
context.addOption("executor-index-bitwidth", executorIndexBitwidth,
llvm::cl::init(64));
}
};

struct DeviceOptions : public OptionsProvider {
/// Target device compute capability (SM version)
int64_t computeCapability;

/// Target device max shared memory per block (kilobytes)
int64_t maxSharedMemoryPerBlockKb;

/// Target device maximum 4-byte register sper block.
uint64_t maxRegistersPerBlock;

void addToOptions(mlir::OptionsContext &context) override {
context.addOption(
"device-compute-capability", computeCapability, llvm::cl::init(64),
llvm::cl::desc("Sets the device compute capbility. Only relevant "
"if '--device-infer-from-host=false'"));
context.addOption("device-max-shared-memory-per-block-kb",
maxSharedMemoryPerBlockKb, llvm::cl::init(0));
context.addOption("device-max-registers-per-block", maxRegistersPerBlock,
llvm::cl::init(0));
context.addOption("device-infer-from-host", shouldInferFromHost,
llvm::cl::init(true),
llvm::cl::desc("Infers device information from host"));
}

llvm::Error finalize() override;

// TODO (pranavm): This should ideally be private but needs to be set from
// `populateStablehloClusteringPipelineOpts`.
/// Whether to ignore `deviceX` options and instead infer them from the GPUs
/// on the host system running the compilation.
bool shouldInferFromHost = false;

private:
/// Infer target device information from the first visible CUDA device on the
/// host executing this code.
llvm::Error inferDeviceOptionsFromHost();
};

} // namespace mlirtrt::compiler

#endif // MLIR_TENSORRT_COMPILER_OPTIONS
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "mlir-tensorrt/Dialect/Plan/IR/Plan.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include <functional>

namespace mlirtrt::compiler {
Expand Down Expand Up @@ -71,14 +72,15 @@ optionsCreateFromArgs(const CompilerClient &client,
llvm::iterator_range(args), err);
}

// TODO: Figure out whether to add a method in the base class like
// "finalizeOptions" or a callback here, or something else if
// `inferDeviceOptionsFromHost` is unique to StableHLO.
//
// Populate device options from host information.
Status inferStatus = result->inferDeviceOptionsFromHost();
if (!inferStatus.isOk())
return inferStatus;
llvm::Error finalizeStatus = result->finalize();

std::optional<std::string> errMsg{};
llvm::handleAllErrors(
std::move(finalizeStatus),
[&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); });

if (errMsg)
return getInternalErrorStatus(errMsg->c_str());

return std::unique_ptr<mlir::OptionsContext>(result.release());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#include "mlir-tensorrt-dialect/Utils/OptionsBundle.h"
#include "mlir-tensorrt/Compiler/Client.h"
#include "mlir-tensorrt/Compiler/Extension.h"
#include "mlir-tensorrt/Compiler/Options.h"
#include "mlir-tensorrt/Compiler/OptionsProviders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Support/TypeID.h"
Expand All @@ -52,40 +52,15 @@ namespace mlirtrt::compiler {
class StableHloToExecutableTask;

struct StableHLOToExecutableOptions
: public mlir::OptionsBundle<DebugOptions, ExecutorOptions> {
: public mlir::OptionsBundle<DebugOptions, ExecutorOptions, DeviceOptions> {
/// Initializes the options. The extensions in the provided registry
/// must be extensions for the StableHloToExecutable task.
StableHLOToExecutableOptions(TaskExtensionRegistry extensions);

/// Set the target device compute capability (SM version) and max shared
/// memory per block (in kilobytes). The `maxSharedMemoryPerBlockKb` is the
/// maximum shared memory per block allowed for kernels and is passed to the
/// TensorRT builder.
StableHLOToExecutableOptions &
setDeviceOptions(int64_t computeCapability,
int64_t maxSharedMemoryPerBlockKb);

/// Infer target device information from the first visible CUDA device on the
/// host executing this code.
Status inferDeviceOptionsFromHost();

/// Return the hash of the options. Returns `nullopt` when the TensorRT
/// layer metadata callback is set since that can't be reliably hashed.
std::optional<llvm::hash_code> getHash() const override;

/// Target device compute capability (SM version)
int64_t deviceComputeCapability;

/// Target device max shared memory per block (kilobytes)
int64_t deviceMaxSharedMemoryPerBlockKb;

/// Target device maximum 4-byte register sper block.
uint64_t deviceMaxRegistersPerBlock;

/// Whether to ignore `deviceX` options and instead infer them from the GPUs
/// on the host system running the compilation.
bool shouldInferDeviceOptionsFromHost = false;

/// Whether to disallow host tensors in TensorRT clusters.
bool disallowHostTensorsInTensorRTClusters = false;

Expand Down
26 changes: 18 additions & 8 deletions mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,15 @@ MTRT_Status mtrtStableHloToExecutableOptionsCreate(
auto result =
std::make_unique<StableHLOToExecutableOptions>(std::move(extensions));

/// Populate device options from host information.
Status inferStatus = result->inferDeviceOptionsFromHost();
if (!inferStatus.isOk())
return wrap(inferStatus);
llvm::Error finalizeStatus = result->finalize();

std::optional<std::string> errMsg{};
llvm::handleAllErrors(
std::move(finalizeStatus),
[&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); });

if (errMsg)
return wrap(getInternalErrorStatus(errMsg->c_str()));

*options = wrap(result.release());
return mtrtStatusGetOk();
Expand Down Expand Up @@ -209,10 +214,15 @@ MTRT_Status mtrtStableHloToExecutableOptionsCreateFromArgs(
"failed to parse options string {0} due to error: {1}", line, err));
}

/// Populate device options from host information.
Status inferStatus = result->inferDeviceOptionsFromHost();
if (!inferStatus.isOk())
return wrap(inferStatus);
llvm::Error finalizeStatus = result->finalize();

std::optional<std::string> errMsg{};
llvm::handleAllErrors(
std::move(finalizeStatus),
[&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); });

if (errMsg)
return wrap(getInternalErrorStatus(errMsg->c_str()));

*options = wrap(result.release());
return mtrtStatusGetOk();
Expand Down
1 change: 1 addition & 0 deletions mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ add_mlir_tensorrt_library(MLIRTensorRTCompilerClient
Client.cpp
Extension.cpp
OptionsRegistry.cpp
OptionsProviders.cpp
PARTIAL_SOURCES_INTENDED

LINK_LIBS PUBLIC
Expand Down
64 changes: 64 additions & 0 deletions mlir-tensorrt/compiler/lib/Compiler/OptionsProviders.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//===- OptionsProviders.cpp -------------------------------------*- C++ -*-===//
//
// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
// All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
///
/// Data structures and functions for manipulating compiler options.
///
//===----------------------------------------------------------------------===//
#include "mlir-tensorrt/Compiler/OptionsProviders.h"
#include "cuda_runtime_api.h"
#include "llvm/Support/Error.h"

// TODO (pranavm): Check if we can just reuse `DeviceInfo.cpp`?
llvm::Error mlirtrt::compiler::DeviceOptions::inferDeviceOptionsFromHost() {
cudaDeviceProp properties;
cudaError_t err = cudaGetDeviceProperties(&properties, 0);
if (err != cudaSuccess)
return llvm::createStringError("failed to get cuda device properties");

int ccMajor = 0;
int ccMinor = 0;
err = cudaDeviceGetAttribute(
&ccMajor, cudaDeviceAttr::cudaDevAttrComputeCapabilityMajor, 0);
if (err != cudaSuccess)
return llvm::createStringError(
"failed to get cuda device compute capability");
err = cudaDeviceGetAttribute(
&ccMinor, cudaDeviceAttr::cudaDevAttrComputeCapabilityMinor, 0);
if (err != cudaSuccess)
return llvm::createStringError(
"failed to get cuda device compute capability");

// We want SM version as a single number.
int64_t smVersion = ccMajor * 10 + ccMinor;
this->computeCapability = smVersion;
this->maxSharedMemoryPerBlockKb = properties.sharedMemPerBlock / 1024;
this->maxRegistersPerBlock = properties.regsPerBlock;
return llvm::Error::success();
}

llvm::Error mlirtrt::compiler::DeviceOptions::finalize() {
if (shouldInferFromHost) {
// TODO (pranavm): How to check whether options were provided?
// Does llvm::cl have a notion of mutually exclusive options like Python's
// argparse?
return inferDeviceOptionsFromHost();
}
return llvm::Error::success();
}
57 changes: 7 additions & 50 deletions mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h"
#include "mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h"
#include "mlir-tensorrt/Compiler/Extension.h"
#include "mlir-tensorrt/Compiler/Options.h"
#include "mlir-tensorrt/Compiler/OptionsProviders.h"
#include "mlir-tensorrt/Compiler/OptionsRegistry.h"
#include "mlir-tensorrt/Compiler/TensorRTExtension/TensorRTExtension.h"
#include "mlir-tensorrt/Conversion/Passes.h"
Expand Down Expand Up @@ -163,56 +163,10 @@ StableHLOToExecutableOptions::StableHLOToExecutableOptions(
llvm::cl::desc("Don't allow TensorRt clusters to contain host tensor "
"calculations (but they can still be inputs)"));

addOption("device-compute-capability", deviceComputeCapability,
llvm::cl::init(64),
llvm::cl::desc("Sets the device compute capbility. Only relevant "
"if '--device-infer-from-host=false'"));
addOption("device-max-shared-memory-per-block-kb",
deviceMaxSharedMemoryPerBlockKb, llvm::cl::init(0));
addOption("device-max-registers-per-block", deviceMaxRegistersPerBlock,
llvm::cl::init(0));
addOption("device-infer-from-host", shouldInferDeviceOptionsFromHost,
llvm::cl::init(true),
llvm::cl::desc("Infers device information from host"));
addOption("entrypoint", entrypoint, llvm::cl::init("main"),
llvm::cl::desc("entrypoint function name"));
}

StableHLOToExecutableOptions &StableHLOToExecutableOptions::setDeviceOptions(
int64_t computeCapability, int64_t maxSharedMemoryPerBlockKb) {
deviceMaxSharedMemoryPerBlockKb = maxSharedMemoryPerBlockKb;
deviceComputeCapability = computeCapability;
return *this;
}

Status StableHLOToExecutableOptions::inferDeviceOptionsFromHost() {
cudaDeviceProp properties;
cudaError_t err = cudaGetDeviceProperties(&properties, 0);
if (err != cudaSuccess)
return getStatusWithMsg(StatusCode::InternalError,
"failed to get cuda device properties");

int ccMajor = 0;
int ccMinor = 0;
err = cudaDeviceGetAttribute(
&ccMajor, cudaDeviceAttr::cudaDevAttrComputeCapabilityMajor, 0);
if (err != cudaSuccess)
return getStatusWithMsg(StatusCode::InternalError,
"failed to get cuda device compute capability");
err = cudaDeviceGetAttribute(
&ccMinor, cudaDeviceAttr::cudaDevAttrComputeCapabilityMinor, 0);
if (err != cudaSuccess)
return getStatusWithMsg(StatusCode::InternalError,
"failed to get cuda device compute capability");

// We want SM version as a single number.
int64_t smVersion = ccMajor * 10 + ccMinor;
this->deviceComputeCapability = smVersion;
this->deviceMaxSharedMemoryPerBlockKb = properties.sharedMemPerBlock / 1024;
this->deviceMaxRegistersPerBlock = properties.regsPerBlock;
return Status::getOk();
}

std::optional<llvm::hash_code> StableHLOToExecutableOptions::getHash() const {
// If a callback is provided, we have no way of reliably hashing it.
if (layerMetadataCallback)
Expand Down Expand Up @@ -482,6 +436,7 @@ StableHloToExecutableTask::compileStableHLOToExecutable(
//===----------------------------------------------------------------------===//

namespace {
// TODO (pranavm): Any way to reuse `DeviceOptions` here?
struct ClusteringPipelineCliOpts
: public PassPipelineOptions<ClusteringPipelineCliOpts> {
Option<bool> lowerStablehloControlFlow{
Expand Down Expand Up @@ -514,11 +469,13 @@ static StableHLOToExecutableOptions populateStablehloClusteringPipelineOpts(
TaskExtensionRegistry extensions;
extensions.getOrCreateExtension<StableHLOToExecutableTensorRTExtension>();

// TODO (pranavm): Why is there this other way of setting up options?
StableHLOToExecutableOptions opts(std::move(extensions));
opts.deviceComputeCapability = cliOpts.deviceComputeCapability;
opts.deviceMaxSharedMemoryPerBlockKb =
opts.get<DeviceOptions>().computeCapability = cliOpts.deviceComputeCapability;
opts.get<DeviceOptions>().maxSharedMemoryPerBlockKb =
cliOpts.deviceMaxSharedMemoryPerBlockKb;
opts.shouldInferDeviceOptionsFromHost = cliOpts.inferDeviceOptionsFromHost;
opts.get<DeviceOptions>().shouldInferFromHost =
cliOpts.inferDeviceOptionsFromHost;
opts.entrypoint = cliOpts.entrypoint;
return opts;
}
Expand Down
Loading

0 comments on commit 78ce313

Please sign in to comment.