Skip to content

Commit

Permalink
Update main branch post-23.12 release (#121)
Browse files Browse the repository at this point in the history
* Fix library list (#118)

Remove nvfuser header

Co-authored-by: Misha Chornyi <mchornyi@nvidia.com>

* Remove nvfuser implementation (#119)

---------

Co-authored-by: Tanmay Verma <tanmay2592@gmail.com>
mc-nv and tanmayv25 authored Dec 26, 2023
1 parent 304c2e8 commit 48e2e29
Showing 4 changed files with 5 additions and 84 deletions.
2 changes: 0 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -138,7 +138,6 @@ set(PT_LIBS
"libtorch_cuda.so"
"libtorch_cuda_linalg.so"
"libtorch_global_deps.so"
"libnvfuser_codegen.so"
)

if (${TRITON_PYTORCH_ENABLE_TORCHVISION})
@@ -214,7 +213,6 @@ if (${TRITON_PYTORCH_DOCKER_BUILD})
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_cpu.so libtorch_cpu.so
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_cuda.so libtorch_cuda.so
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_cuda_linalg.so libtorch_cuda_linalg.so
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libnvfuser_codegen.so libnvfuser_codegen.so
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_global_deps.so libtorch_global_deps.so
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libcaffe2_nvrtc.so libcaffe2_nvrtc.so
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/libtorchvision.so libtorchvision.so
22 changes: 0 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
@@ -144,26 +144,6 @@ key: "INFERENCE_MODE"
}
```

* `ENABLE_NVFUSER`: Boolean flag to enable the NvFuser (CUDA Graph
Fuser) optimization for TorchScript models. If not specified, the
default PyTorch fuser is used. If `ENABLE_NVFUSER` is specified, the
`ENABLE_TENSOR_FUSER` configuration (see below) is ignored.

Please note that in some models generated using trace in old PyTorch versions might not work
correctly with NvFuser. We recommend using scripting and a recent version of PyTorch
to generate these models.

The section of model config file specifying this parameter will look like:

```
parameters: {
key: "ENABLE_NVFUSER"
value: {
string_value: "true"
}
}
```

* `ENABLE_WEIGHT_SHARING`: Boolean flag to enable model instances on the same device to
share weights. This optimization should not be used with stateful models. If not specified,
weight sharing is disabled.
@@ -204,8 +184,6 @@ complex execution modes and dynamic shapes. If not specified, all are enabled by

`ENABLE_JIT_PROFILING`

`ENABLE_TENSOR_FUSER`

### Support

#### Model Instance Group Kind
64 changes: 5 additions & 59 deletions src/libtorch.cc
Original file line number Diff line number Diff line change
@@ -98,10 +98,6 @@ class ModelState : public BackendModel {
return enable_jit_executor_pair_;
}
bool EnabledInferenceMode() { return enable_inference_mode_; }
const std::pair<bool, bool>& EnabledNvfuserPair() const
{
return enable_nvfuser_pair_;
}
bool EnabledCacheCleaning() { return enable_cache_cleaning_; }

bool EnabledWeightSharing() { return enable_weight_sharing_; }
@@ -132,16 +128,11 @@ class ModelState : public BackendModel {

// Flag pairs to indicate if various JIT settings are set and
// enabled respectively. Defaults to (false, true). Default behavior
// is to do nothing if not explicitly set. Tensor fuser flag is
// ignore if nvfuser is explicitly set.
// is to do nothing if not explicitly set.
std::pair<bool, bool> enable_tensor_fuser_pair_;
std::pair<bool, bool> enable_jit_profiling_pair_;
std::pair<bool, bool> enable_jit_executor_pair_;

// Flag pair to indicate whether nvfuser is set and enabled respectively.
// Defaults to (false, false).
std::pair<bool, bool> enable_nvfuser_pair_;

// Model mapping for shared TorchScript model across all instances on the
// same device. The key is a pair of isGPU and device index.
std::map<
@@ -233,8 +224,7 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
enable_inference_mode_(true), enable_cache_cleaning_(false),
enable_weight_sharing_(false), enable_tensor_fuser_pair_({false, true}),
enable_jit_profiling_pair_({false, true}),
enable_jit_executor_pair_({false, true}),
enable_nvfuser_pair_({false, false})
enable_jit_executor_pair_({false, true})
{
}

@@ -475,29 +465,6 @@ ModelState::ParseParameters()
" for model instance '" + Name() + "'")
.c_str());
}

// If 'ENABLE_NVFUSER' is not present in 'parameters' then no
// update is made to 'enable_nvfuser'.
bool enable_nvfuser = false;
err = ParseParameter(params, "ENABLE_NVFUSER", &enable_nvfuser);
if (err != nullptr) {
if (TRITONSERVER_ErrorCode(err) != TRITONSERVER_ERROR_NOT_FOUND) {
return err;
} else {
LOG_MESSAGE(
TRITONSERVER_LOG_INFO, (std::string("NvFuser is not specified") +
" for model instance '" + Name() + "'")
.c_str());
TRITONSERVER_ErrorDelete(err);
}
} else {
enable_nvfuser_pair_ = {true, enable_nvfuser};
LOG_MESSAGE(
TRITONSERVER_LOG_INFO, (std::string("NvFuser is ") +
(enable_nvfuser ? "enabled" : "disabled") +
" for model instance '" + Name() + "'")
.c_str());
}
}

return nullptr;
@@ -1552,34 +1519,13 @@ ModelInstanceState::Execute(
std::get<1>(model_state_->EnabledJitExecutor());
}

// Fuser. Parameter is ignored if NVFuser parameter is explicitly
// set (either enabled or disabled). No change is made unless
// fuser is explicitly set in parameters.
if (!std::get<0>(model_state_->EnabledNvfuserPair()) &&
std::get<0>(model_state_->EnabledTensorExprFuser())) {
// Fuser. No change is made unless fuser is explicitly set in
// parameters.
if (std::get<0>(model_state_->EnabledTensorExprFuser())) {
torch::jit::setTensorExprFuserEnabled(
std::get<1>(model_state_->EnabledTensorExprFuser()));
}

// NV-Fuser. No change is made unless parameter is explicitly set.
if (std::get<0>(model_state_->EnabledNvfuserPair())) {
bool is_device_gpu =
(device_.is_cuda() ||
((Kind() == TRITONSERVER_INSTANCEGROUPKIND_MODEL) &&
(device_cnt_ > 0)));
if (std::get<1>(model_state_->EnabledNvfuserPair()) && is_device_gpu) {
torch::jit::overrideCanFuseOnCPU(false);
torch::jit::overrideCanFuseOnGPU(false);
torch::jit::setTensorExprFuserEnabled(false);
torch::jit::fuser::cuda::setEnabled(true);
} else {
torch::jit::overrideCanFuseOnCPU(true);
torch::jit::overrideCanFuseOnGPU(true);
torch::jit::setTensorExprFuserEnabled(true);
torch::jit::fuser::cuda::setEnabled(false);
}
}

torch::NoGradGuard no_grad;

// If input is a dictionary, prepare dictionary from 'input_tensors'.
1 change: 0 additions & 1 deletion src/libtorch_utils.h
Original file line number Diff line number Diff line change
@@ -35,7 +35,6 @@
#pragma warning(push, 0)
#include <torch/csrc/jit/codegen/cuda/interface.h>
#include <torch/csrc/jit/codegen/fuser/interface.h>
#include <torch/csrc/jit/passes/cuda_graph_fuser.h>
#include <torch/csrc/jit/passes/tensorexpr_fuser.h>
#include <torch/csrc/jit/runtime/graph_executor.h>
#include <torch/script.h> // One-stop header for TorchScript

0 comments on commit 48e2e29

Please sign in to comment.