diff --git a/README.md b/README.md index 22ef270..2c14421 100644 --- a/README.md +++ b/README.md @@ -144,26 +144,6 @@ key: "INFERENCE_MODE" } ``` -* `ENABLE_NVFUSER`: Boolean flag to enable the NvFuser (CUDA Graph -Fuser) optimization for TorchScript models. If not specified, the -default PyTorch fuser is used. If `ENABLE_NVFUSER` is specified, the -`ENABLE_TENSOR_FUSER` configuration (see below) is ignored. - -Please note that in some models generated using trace in old PyTorch versions might not work -correctly with NvFuser. We recommend using scripting and a recent version of PyTorch -to generate these models. - -The section of model config file specifying this parameter will look like: - -``` -parameters: { -key: "ENABLE_NVFUSER" - value: { - string_value: "true" - } -} -``` - * `ENABLE_WEIGHT_SHARING`: Boolean flag to enable model instances on the same device to share weights. This optimization should not be used with stateful models. If not specified, weight sharing is disabled. @@ -204,8 +184,6 @@ complex execution modes and dynamic shapes. If not specified, all are enabled by `ENABLE_JIT_PROFILING` - `ENABLE_TENSOR_FUSER` - ### Support #### Model Instance Group Kind diff --git a/src/libtorch.cc b/src/libtorch.cc index 2731094..472e19d 100644 --- a/src/libtorch.cc +++ b/src/libtorch.cc @@ -98,10 +98,6 @@ class ModelState : public BackendModel { return enable_jit_executor_pair_; } bool EnabledInferenceMode() { return enable_inference_mode_; } - const std::pair& EnabledNvfuserPair() const - { - return enable_nvfuser_pair_; - } bool EnabledCacheCleaning() { return enable_cache_cleaning_; } bool EnabledWeightSharing() { return enable_weight_sharing_; } @@ -132,16 +128,11 @@ class ModelState : public BackendModel { // Flag pairs to indicate if various JIT settings are set and // enabled respectively. Defaults to (false, true). Default behavior - // is to do nothing if not explicitly set. Tensor fuser flag is - // ignore if nvfuser is explicitly set. + // is to do nothing if not explicitly set. std::pair enable_tensor_fuser_pair_; std::pair enable_jit_profiling_pair_; std::pair enable_jit_executor_pair_; - // Flag pair to indicate whether nvfuser is set and enabled respectively. - // Defaults to (false, false). - std::pair enable_nvfuser_pair_; - // Model mapping for shared TorchScript model across all instances on the // same device. The key is a pair of isGPU and device index. std::map< @@ -233,8 +224,7 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model) enable_inference_mode_(true), enable_cache_cleaning_(false), enable_weight_sharing_(false), enable_tensor_fuser_pair_({false, true}), enable_jit_profiling_pair_({false, true}), - enable_jit_executor_pair_({false, true}), - enable_nvfuser_pair_({false, false}) + enable_jit_executor_pair_({false, true}) { } @@ -475,29 +465,6 @@ ModelState::ParseParameters() " for model instance '" + Name() + "'") .c_str()); } - - // If 'ENABLE_NVFUSER' is not present in 'parameters' then no - // update is made to 'enable_nvfuser'. - bool enable_nvfuser = false; - err = ParseParameter(params, "ENABLE_NVFUSER", &enable_nvfuser); - if (err != nullptr) { - if (TRITONSERVER_ErrorCode(err) != TRITONSERVER_ERROR_NOT_FOUND) { - return err; - } else { - LOG_MESSAGE( - TRITONSERVER_LOG_INFO, (std::string("NvFuser is not specified") + - " for model instance '" + Name() + "'") - .c_str()); - TRITONSERVER_ErrorDelete(err); - } - } else { - enable_nvfuser_pair_ = {true, enable_nvfuser}; - LOG_MESSAGE( - TRITONSERVER_LOG_INFO, (std::string("NvFuser is ") + - (enable_nvfuser ? "enabled" : "disabled") + - " for model instance '" + Name() + "'") - .c_str()); - } } return nullptr; @@ -1552,34 +1519,13 @@ ModelInstanceState::Execute( std::get<1>(model_state_->EnabledJitExecutor()); } - // Fuser. Parameter is ignored if NVFuser parameter is explicitly - // set (either enabled or disabled). No change is made unless - // fuser is explicitly set in parameters. - if (!std::get<0>(model_state_->EnabledNvfuserPair()) && - std::get<0>(model_state_->EnabledTensorExprFuser())) { + // Fuser. No change is made unless fuser is explicitly set in + // parameters. + if (std::get<0>(model_state_->EnabledTensorExprFuser())) { torch::jit::setTensorExprFuserEnabled( std::get<1>(model_state_->EnabledTensorExprFuser())); } - // NV-Fuser. No change is made unless parameter is explicitly set. - if (std::get<0>(model_state_->EnabledNvfuserPair())) { - bool is_device_gpu = - (device_.is_cuda() || - ((Kind() == TRITONSERVER_INSTANCEGROUPKIND_MODEL) && - (device_cnt_ > 0))); - if (std::get<1>(model_state_->EnabledNvfuserPair()) && is_device_gpu) { - torch::jit::overrideCanFuseOnCPU(false); - torch::jit::overrideCanFuseOnGPU(false); - torch::jit::setTensorExprFuserEnabled(false); - torch::jit::fuser::cuda::setEnabled(true); - } else { - torch::jit::overrideCanFuseOnCPU(true); - torch::jit::overrideCanFuseOnGPU(true); - torch::jit::setTensorExprFuserEnabled(true); - torch::jit::fuser::cuda::setEnabled(false); - } - } - torch::NoGradGuard no_grad; // If input is a dictionary, prepare dictionary from 'input_tensors'.