diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp index 1c2b9632cb..e30a1a56f2 100644 --- a/core/conversion/conversion.cpp +++ b/core/conversion/conversion.cpp @@ -145,7 +145,7 @@ void AddInputs(ConversionCtx* ctx, at::ArrayRef inputs ss << " " << i << ","; } ss << ']'; - LOG_DEBUG(ss.str()); + LOG_DEBUG(ctx->logger, ss.str()); TRTORCH_CHECK( input_tensors.size() == input_specs.size(), diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp index 50831a803b..8a1728daa3 100644 --- a/core/conversion/conversionctx/ConversionCtx.cpp +++ b/core/conversion/conversionctx/ConversionCtx.cpp @@ -75,15 +75,15 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) cfg->setInt8Calibrator(settings.calibrator); break; case nvinfer1::DataType::kFLOAT: + break; case nvinfer1::DataType::kINT32: case nvinfer1::DataType::kBOOL: default: - break; + TRTORCH_THROW_ERROR("Requested kernel precision that is unsupported: " << *p << " options are float, half, int8"); } } enabled_precisions = settings.enabled_precisions; - input_dtypes = settings.input_dtypes; if (settings.disable_tf32) { cfg->clearFlag(nvinfer1::BuilderFlag::kTF32); diff --git a/core/conversion/conversionctx/ConversionCtx.h b/core/conversion/conversionctx/ConversionCtx.h index 0570bf599b..1ddec22c89 100644 --- a/core/conversion/conversionctx/ConversionCtx.h +++ b/core/conversion/conversionctx/ConversionCtx.h @@ -59,7 +59,6 @@ struct ConversionCtx { nvinfer1::IBuilder* builder; nvinfer1::INetworkDefinition* net; nvinfer1::IBuilderConfig* cfg; - std::vector input_dtypes; std::set enabled_precisions; BuilderSettings settings; util::logging::TRTorchLogger logger; diff --git a/core/ir/Input.cpp b/core/ir/Input.cpp index 4f3c2c9fa1..3ce0a339ad 100644 --- a/core/ir/Input.cpp +++ b/core/ir/Input.cpp @@ -5,59 +5,6 @@ namespace trtorch { namespace core { namespace ir { -// Input(std::vector shape) { -// if (d.size() > 5) { -// LOG_WARNING("Verify that this dim size is accepted"); -// } - -// opt = util::toDims(d); -// min = util::toDims(d); -// max = util::toDims(d); -// input_shape = util::toDims(d); -// input_is_dynamic = false; -// format = nvinfer1::TensorFormat::kLINEAR; -// dtype = nvinfer1::DataType::kFLOAT; -// } - -// Input(std::vector min_shape, std::vector opt_shape, std::vector max_shape) { -// if (min_shape.size() > 5 || opt_shape.size() > 5 || max_shape.size() > 5) { -// LOG_WARNING("Verify that this dim size is accepted"); -// } - -// std::set sizes; -// sizes.insert(min_shape.size()); -// sizes.insert(opt_shape.size()); -// sizes.insert(max_shape.size()); - -// if (sizes.size() != 1) { -// LOG_ERROR( -// "Expected all input sizes have the same dimensions, but found dimensions: min(" -// << min_shape.size() << "), opt(" << opt_shape.size() << "), max(" << max_shape.size() << ")"); -// } - -// min = util::toDims(min_shape); -// opt = util::toDims(opt_shape); -// max = util::toDims(max_shape); -// format = nvinfer1::TensorFormat::kLINEAR; -// dtype = nvinfer1::DataType::kFLOAT; - -// std::vector dyn_shape; -// for (size_t i = 0; i < opt_shape.size(); i++) { -// std::set dim; -// dim.insert(min_shape[i]); -// dim.insert(opt_shape[i]); -// dim.insert(max_shape[i]); -// if (dim.size() != 1) { -// dyn_shape.push_back(-1); -// input_is_dynamic = true; -// } else { -// dyn_shape.push_back(opt_shape[i]); -// } -// } - -// input_shape = util::toDims(dyn_shape); -// } - bool valid_dtype_format_combo(nvinfer1::DataType dtype, nvinfer1::TensorFormat format) { switch (dtype) { case nvinfer1::DataType::kINT8: // Supports just Linear (NCHW) @@ -170,7 +117,6 @@ Input::Input( dim.insert(min_shape[i]); dim.insert(opt_shape[i]); dim.insert(max_shape[i]); - LOG_DEBUG(dim.size()); if (dim.size() != 1) { dyn_shape.push_back(-1); input_is_dynamic = true; diff --git a/cpp/api/include/trtorch/trtorch.h b/cpp/api/include/trtorch/trtorch.h index e0fd531e80..31ca4f95b9 100644 --- a/cpp/api/include/trtorch/trtorch.h +++ b/cpp/api/include/trtorch/trtorch.h @@ -703,8 +703,8 @@ struct TRTORCH_API CompileSpec { bool truncate_long_and_double = false; /** - * Restrict operating type to only set default operation precision - * (op_precision) + * Restrict operating type to only the lowest enabled operation precision + * (enabled_precisions) */ bool strict_types = false; diff --git a/cpp/api/src/compile_spec.cpp b/cpp/api/src/compile_spec.cpp index 5e44c6b52f..d1b88f9638 100644 --- a/cpp/api/src/compile_spec.cpp +++ b/cpp/api/src/compile_spec.cpp @@ -73,6 +73,7 @@ std::ostream& operator<<(std::ostream& os, const CompileSpec::Input& input) { } nvinfer1::DataType toTRTDataType(CompileSpec::DataType value) { + TRTORCH_CHECK(!(value == CompileSpec::DataType::kUnknown), "Data type is unknown"); switch (value) { case CompileSpec::DataType::kChar: return nvinfer1::DataType::kINT8; @@ -89,6 +90,7 @@ nvinfer1::DataType toTRTDataType(CompileSpec::DataType value) { } nvinfer1::TensorFormat toTRTTensorFormat(CompileSpec::TensorFormat value) { + TRTORCH_CHECK(!(value == CompileSpec::TensorFormat::kUnknown), "Tensor format is unknown"); switch (value) { case CompileSpec::TensorFormat::kChannelsLast: return nvinfer1::TensorFormat::kHWC; @@ -101,7 +103,7 @@ nvinfer1::TensorFormat toTRTTensorFormat(CompileSpec::TensorFormat value) { CompileSpec::DataType::DataType(c10::ScalarType t) { TRTORCH_CHECK( t == at::kHalf || t == at::kFloat || t == at::kChar || t == at::kInt || t == at::kBool, - "Data type is unsupported"); + "Data type is unsupported (" << t << ")"); switch (t) { case at::kHalf: value = DataType::kHalf; @@ -124,7 +126,7 @@ CompileSpec::DataType::DataType(c10::ScalarType t) { CompileSpec::TensorFormat::TensorFormat(at::MemoryFormat t) { TRTORCH_CHECK( - t == at::MemoryFormat::Contiguous || t == at::MemoryFormat::ChannelsLast, "Tensor format is unsupported"); + t == at::MemoryFormat::Contiguous || t == at::MemoryFormat::ChannelsLast, "Tensor format is unsupported (" << t << ")"); switch (t) { case at::MemoryFormat::ChannelsLast: @@ -325,14 +327,16 @@ core::runtime::CudaDevice to_internal_cuda_device(CompileSpec::Device device) { core::CompileSpec to_internal_compile_spec(CompileSpec external) { core::CompileSpec internal(to_vec_internal_inputs(external.inputs)); - if (external.input_ranges.size() > 0) { + if (external.input_ranges.size() > 0 && external.inputs.size() > 0) { + TRTORCH_THROW_ERROR("Saw both input specs listed for inputs and input_ranges in CompileSpec. input_ranges is deprecated and will be removed in v0.5.0. Please port forward to using inputs"); + } else if (external.input_ranges.size() > 0) { internal = core::CompileSpec(to_vec_internal_inputs(external.input_ranges)); } else { TRTORCH_CHECK(external.inputs.size() > 0, "Compilation requires at least one input specification"); internal = core::CompileSpec(to_vec_internal_inputs(external.inputs)); } - if (external.enabled_precisions.size() <= 1 && + if (external.enabled_precisions.size() == 1 && toTRTDataType(*external.enabled_precisions.begin()) == nvinfer1::DataType::kFLOAT && toTRTDataType(external.op_precision) != nvinfer1::DataType::kFLOAT) { internal.convert_info.engine_settings.enabled_precisions.insert(toTRTDataType(external.op_precision)); diff --git a/cpp/benchmark/main.cpp b/cpp/benchmark/main.cpp index 3089a5fdab..ac5a823247 100644 --- a/cpp/benchmark/main.cpp +++ b/cpp/benchmark/main.cpp @@ -124,7 +124,7 @@ int main(int argc, const char* argv[]) { compile_spec.workspace_size = 1 << 20; #ifdef HALF - compile_spec.op_precision = torch::kF16; + compile_spec.enabled_precisions.insert(torch::kF16); #endif auto trt_mod = trtorch::CompileGraph(mod, compile_spec); diff --git a/cpp/ptq/README.md b/cpp/ptq/README.md index ceffb6dcec..7cb179cd64 100644 --- a/cpp/ptq/README.md +++ b/cpp/ptq/README.md @@ -98,8 +98,8 @@ Then all thats required to setup the module for INT8 calibration is to set the f std::vector> input_shape = {{32, 3, 32, 32}}; /// Configure settings for compilation auto compile_spec = trtorch::CompileSpec({input_shape}); - /// Set operating precision to INT8 - compile_spec.op_precision = torch::kI8; + /// Set enable INT8 precision + compile_spec.enabled_precisions.insert(torch::kI8); /// Use the TensorRT Entropy Calibrator compile_spec.ptq_calibrator = calibrator; /// Set a larger workspace (you may get better performace from doing so) diff --git a/cpp/ptq/main.cpp b/cpp/ptq/main.cpp index b93cb6c471..4c431a25ff 100644 --- a/cpp/ptq/main.cpp +++ b/cpp/ptq/main.cpp @@ -46,7 +46,7 @@ torch::jit::Module compile_int8_model(const std::string& data_dir, torch::jit::M /// Configure settings for compilation auto compile_spec = trtorch::CompileSpec({input_shape}); /// Set operating precision to INT8 - compile_spec.op_precision = torch::kI8; + compile_spec.enable_precisions.insert(torch::kI8); /// Use the TensorRT Entropy Calibrator compile_spec.ptq_calibrator = calibrator; /// Set max batch size for the engine diff --git a/tests/accuracy/test_dla_fp16_accuracy.cpp b/tests/accuracy/test_dla_fp16_accuracy.cpp index bb45efe479..d74e7e6836 100644 --- a/tests/accuracy/test_dla_fp16_accuracy.cpp +++ b/tests/accuracy/test_dla_fp16_accuracy.cpp @@ -27,7 +27,7 @@ TEST_P(AccuracyTests, DLAFP16AccuracyIsClose) { std::vector> input_shape = {{32, 3, 32, 32}}; auto compile_spec = trtorch::CompileSpec({input_shape}); - compile_spec.op_precision = torch::kF16; + compile_spec.enabled_precisions.insert(torch::kF16); compile_spec.device.device_type = trtorch::CompileSpec::Device::DeviceType::kDLA; compile_spec.device.gpu_id = 0; compile_spec.device.dla_core = 1; diff --git a/tests/accuracy/test_dla_int8_accuracy.cpp b/tests/accuracy/test_dla_int8_accuracy.cpp index d4908ef3c3..63963d4521 100644 --- a/tests/accuracy/test_dla_int8_accuracy.cpp +++ b/tests/accuracy/test_dla_int8_accuracy.cpp @@ -21,7 +21,7 @@ TEST_P(AccuracyTests, DLAINT8AccuracyIsClose) { // Configure settings for compilation auto compile_spec = trtorch::CompileSpec({input_shape}); // Set operating precision to INT8 - compile_spec.op_precision = torch::kI8; + compile_spec.enabled_precisions.insert(torch::kI8); // Use the TensorRT Entropy Calibrator compile_spec.ptq_calibrator = calibrator; // Set max batch size for the engine diff --git a/tests/accuracy/test_fp16_accuracy.cpp b/tests/accuracy/test_fp16_accuracy.cpp index 885bc5529d..abc1f15356 100644 --- a/tests/accuracy/test_fp16_accuracy.cpp +++ b/tests/accuracy/test_fp16_accuracy.cpp @@ -27,7 +27,7 @@ TEST_P(AccuracyTests, FP16AccuracyIsClose) { std::vector> input_shape = {{32, 3, 32, 32}}; auto compile_spec = trtorch::CompileSpec({input_shape}); - compile_spec.op_precision = torch::kF16; + compile_spec.enabled_precisions.insert(torch::kF16); auto trt_mod = trtorch::CompileGraph(mod, compile_spec); diff --git a/tests/accuracy/test_fp32_accuracy.cpp b/tests/accuracy/test_fp32_accuracy.cpp index f5ee648cad..7be4b7bf6d 100644 --- a/tests/accuracy/test_fp32_accuracy.cpp +++ b/tests/accuracy/test_fp32_accuracy.cpp @@ -27,7 +27,7 @@ TEST_P(AccuracyTests, FP32AccuracyIsClose) { std::vector> input_shape = {{32, 3, 32, 32}}; auto compile_spec = trtorch::CompileSpec({input_shape}); - compile_spec.op_precision = torch::kF32; + compile_spec.enabled_precisions.insert(torch::kF32); auto trt_mod = trtorch::CompileGraph(mod, compile_spec); diff --git a/tests/accuracy/test_int8_accuracy.cpp b/tests/accuracy/test_int8_accuracy.cpp index 0366fe2e03..e3b91cf4d0 100644 --- a/tests/accuracy/test_int8_accuracy.cpp +++ b/tests/accuracy/test_int8_accuracy.cpp @@ -21,7 +21,7 @@ TEST_P(AccuracyTests, INT8AccuracyIsClose) { // Configure settings for compilation auto compile_spec = trtorch::CompileSpec({input_shape}); // Set operating precision to INT8 - compile_spec.op_precision = torch::kI8; + compile_spec.enabled_precisions.insert(torch::kI8); // Use the TensorRT Entropy Calibrator compile_spec.ptq_calibrator = calibrator; // Set max batch size for the engine diff --git a/tests/cpp/test_default_input_types.cpp b/tests/cpp/test_default_input_types.cpp index fa814fdcd9..1522126791 100644 --- a/tests/cpp/test_default_input_types.cpp +++ b/tests/cpp/test_default_input_types.cpp @@ -25,4 +25,4 @@ TEST_P(CppAPITests, InputsUseDefault) { INSTANTIATE_TEST_SUITE_P( CompiledModuleForwardIsCloseSuite, CppAPITests, - testing::Values(PathAndInSize({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}}))); + testing::Values(PathAndInSize({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, 2e-5}))); diff --git a/tests/py/test_api_dla.py b/tests/py/test_api_dla.py index 312a043061..d0f244c6c1 100644 --- a/tests/py/test_api_dla.py +++ b/tests/py/test_api_dla.py @@ -30,14 +30,14 @@ def setUp(self): def test_compile_traced(self): compile_spec = { - "input_shapes": [self.input.shape], + "inputs": [trtorch.Input(self.input.shape)], "device": { "device_type": trtorch.DeviceType.DLA, "gpu_id": 0, "dla_core": 0, "allow_gpu_fallback": True }, - "op_precision": torch.half + "enabled_precision": {torch.float, torch.half} } trt_mod = trtorch.compile(self.traced_model, compile_spec) @@ -46,14 +46,14 @@ def test_compile_traced(self): def test_compile_script(self): compile_spec = { - "input_shapes": [self.input.shape], + "inputs": [trtorch.Input(self.input.shape)], "device": { "device_type": trtorch.DeviceType.DLA, "gpu_id": 0, "dla_core": 0, "allow_gpu_fallback": True }, - "op_precision": torch.half + "enabled_precision": {torch.float, torch.half} } trt_mod = trtorch.compile(self.scripted_model, compile_spec) diff --git a/tests/py/test_multi_gpu.py b/tests/py/test_multi_gpu.py index ca044afb99..d3aabb1f25 100644 --- a/tests/py/test_multi_gpu.py +++ b/tests/py/test_multi_gpu.py @@ -22,7 +22,7 @@ def setUp(self): def test_compile_traced(self): trtorch.set_device(0) compile_spec = { - "input_shapes": [self.input.shape], + "inputs": [trtorch.Input(self.input.shape)], "device": { "device_type": trtorch.DeviceType.GPU, "gpu_id": self.target_gpu, @@ -41,7 +41,7 @@ def test_compile_traced(self): def test_compile_script(self): trtorch.set_device(0) compile_spec = { - "input_shapes": [self.input.shape], + "inputs": [trtorch.Input(self.input.shape)], "device": { "device_type": trtorch.DeviceType.GPU, "gpu_id": self.target_gpu, @@ -74,7 +74,7 @@ def setUp(self): def test_compile_traced(self): trtorch.set_device(0) compile_spec = { - "input_shapes": [self.input.shape], + "inputs": [trtorch.Input(self.input.shape)], "device": { "device_type": trtorch.DeviceType.GPU, "gpu_id": self.target_gpu, @@ -93,7 +93,7 @@ def test_compile_traced(self): def test_compile_script(self): trtorch.set_device(0) compile_spec = { - "input_shapes": [self.input.shape], + "inputs": [trtorch.Input(self.input.shape)], "device": { "device_type": trtorch.DeviceType.GPU, "gpu_id": self.target_gpu, diff --git a/tests/py/test_ptq_dataloader_calibrator.py b/tests/py/test_ptq_dataloader_calibrator.py index a22aeef3b9..7349a58f70 100644 --- a/tests/py/test_ptq_dataloader_calibrator.py +++ b/tests/py/test_ptq_dataloader_calibrator.py @@ -61,8 +61,8 @@ def test_compile_script(self): log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc)) compile_spec = { - "input_shapes": [[1, 3, 32, 32]], - "op_precision": torch.int8, + "inputs": [trtorch.Input([1, 3, 32, 32])], + "enabled_precision": {torch.float, torch.int8}, "calibrator": self.calibrator, "device": { "device_type": trtorch.DeviceType.GPU, diff --git a/tests/py/test_ptq_to_backend.py b/tests/py/test_ptq_to_backend.py index ae665dda71..3c16ec14da 100644 --- a/tests/py/test_ptq_to_backend.py +++ b/tests/py/test_ptq_to_backend.py @@ -35,8 +35,8 @@ def setUp(self): self.spec = { "forward": trtorch.TensorRTCompileSpec({ - "input_shapes": [[1, 3, 32, 32]], - "op_precision": torch.int8, + "inputs": [trtorch.Input([1, 3, 32, 32])], + "enabled_precision": {torch.float, torch.int8}, "calibrator": self.calibrator, "device": { "device_type": trtorch.DeviceType.GPU, diff --git a/tests/py/test_ptq_trt_calibrator.py b/tests/py/test_ptq_trt_calibrator.py index 737ecee4be..c434a3f729 100644 --- a/tests/py/test_ptq_trt_calibrator.py +++ b/tests/py/test_ptq_trt_calibrator.py @@ -103,8 +103,8 @@ def test_compile_script(self): log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc)) compile_spec = { - "input_shapes": [[1, 3, 32, 32]], - "op_precision": torch.int8, + "inputs": [trtorch.Input([1, 3, 32, 32])], + "enabled_precision": {torch.float, torch.int8}, "calibrator": self.calibrator, "device": { "device_type": trtorch.DeviceType.GPU, diff --git a/tests/py/test_to_backend_api.py b/tests/py/test_to_backend_api.py index 77ada08931..5be09d9517 100644 --- a/tests/py/test_to_backend_api.py +++ b/tests/py/test_to_backend_api.py @@ -14,8 +14,8 @@ def setUp(self): self.spec = { "forward": trtorch.TensorRTCompileSpec({ - "input_shapes": [[1, 3, 300, 300]], - "op_precision": torch.float, + "inputs": [trtorch.Input([1, 3, 300, 300])], + "enabled_precision": {torch.float}, "refit": False, "debug": False, "strict_types": False, diff --git a/tests/py/test_trt_intercompatability.py b/tests/py/test_trt_intercompatability.py index ffc4cb7217..6aeba27fe0 100644 --- a/tests/py/test_trt_intercompatability.py +++ b/tests/py/test_trt_intercompatability.py @@ -15,7 +15,7 @@ def setUp(self): def test_pt_to_trt(self): compile_spec = { - "input_shapes": [self.input.shape], + "inputs": [trtorch.Input(self.input.shape)], "device": { "device_type": trtorch.DeviceType.GPU, "gpu_id": 0,