From ba696521eed5d8b49a3c6f21ef243ca4b3474d64 Mon Sep 17 00:00:00 2001 From: Asfiya Baig Date: Thu, 25 Apr 2024 16:01:48 -0700 Subject: [PATCH] TensorRT 10.0 GA Release Signed-off-by: Asfiya Baig --- CHANGELOG.md | 19 +- CMakeLists.txt | 32 +- README.md | 54 +- VERSION | 2 +- .../modules/find_library_create_target.cmake | 7 +- cmake/modules/set_ifndef.cmake | 2 +- .../cmake_aarch64-android.toolchain | 2 +- .../toolchains/cmake_aarch64-native.toolchain | 2 +- cmake/toolchains/cmake_aarch64.toolchain | 14 +- .../toolchains/cmake_aarch64_cross.toolchain | 2 +- cmake/toolchains/cmake_ppc64le.toolchain | 2 +- cmake/toolchains/cmake_qnx.toolchain | 2 +- cmake/toolchains/cmake_x64_win.toolchain | 2 +- cmake/toolchains/cmake_x86_64.toolchain | 2 +- .../cmake_x86_64_agnostic.toolchain | 2 +- demo/BERT/CMakeLists.txt | 2 +- demo/BERT/README.md | 6 +- demo/BERT/builder.py | 4 +- demo/BERT/builder_utils.py | 2 +- demo/BERT/builder_varseqlen.py | 4 +- demo/BERT/helpers/calibrator.py | 2 +- demo/BERT/helpers/data_processing.py | 2 +- demo/BERT/helpers/tokenization.py | 2 +- demo/BERT/infer_c/bert_infer.h | 18 +- demo/BERT/infer_c/common.h | 4 +- demo/BERT/infer_c/infer_c.cpp | 2 +- demo/BERT/infer_c/logging.cpp | 2 +- demo/BERT/infer_c/logging.h | 2 +- demo/BERT/infer_c/perf.cpp | 2 +- demo/BERT/inference.py | 4 +- demo/BERT/inference_c.py | 2 +- demo/BERT/inference_varseqlen.py | 4 +- demo/BERT/perf.py | 2 +- demo/BERT/perf_varseqlen.py | 2 +- demo/BERT/squad/evaluate-v1.1.py | 2 +- demo/BERT/squad/evaluate-v2.0.py | 2 +- demo/DeBERTa/deberta_onnx_modify.py | 2 +- demo/DeBERTa/deberta_ort_inference.py | 2 +- demo/DeBERTa/deberta_pytorch2onnx.py | 2 +- demo/DeBERTa/deberta_tensorrt_inference.py | 2 +- demo/DeBERTa/requirements.txt | 2 +- demo/Diffusion/README.md | 31 +- demo/Diffusion/calibration.py | 177 --- demo/Diffusion/demo_img2img.py | 2 +- demo/Diffusion/demo_inpaint.py | 2 +- demo/Diffusion/demo_txt2img.py | 2 +- demo/Diffusion/demo_txt2img_xl.py | 2 +- demo/Diffusion/models.py | 27 +- demo/Diffusion/requirements.txt | 10 +- demo/Diffusion/stable_diffusion_pipeline.py | 105 +- demo/Diffusion/utilities.py | 64 +- demo/Diffusion/utils_ammo.py | 160 ++ demo/Jasper/README.md | 3 - demo/Tacotron2/README.md | 113 -- demo/Tacotron2/common/audio_processing.py | 110 -- demo/Tacotron2/common/layers.py | 96 -- demo/Tacotron2/common/stft.py | 159 -- demo/Tacotron2/common/utils.py | 72 - demo/Tacotron2/config.json | 11 - demo/Tacotron2/data_functions.py | 58 - demo/Tacotron2/inference.py | 266 ---- demo/Tacotron2/inference_perf.py | 117 -- demo/Tacotron2/main.py | 43 - demo/Tacotron2/models.py | 137 -- demo/Tacotron2/multiproc.py | 75 - demo/Tacotron2/phrases/phrase.txt | 1 - demo/Tacotron2/phrases/phrase_1_128.txt | 1 - demo/Tacotron2/phrases/phrase_1_256.txt | 2 - demo/Tacotron2/phrases/phrase_1_64.txt | 1 - demo/Tacotron2/phrases/phrase_4_256.txt | 4 - demo/Tacotron2/phrases/phrase_4_64.txt | 4 - demo/Tacotron2/phrases/phrase_8_256.txt | 8 - demo/Tacotron2/phrases/phrase_8_64.txt | 8 - demo/Tacotron2/preprocess_audio2mel.py | 81 -- demo/Tacotron2/requirements.txt | 12 - demo/Tacotron2/run_latency_tests.sh | 27 - .../Tacotron2/scripts/download_checkpoints.sh | 31 - demo/Tacotron2/scripts/inference_benchmark.sh | 21 - .../scripts/install_prerequisites.sh | 25 - demo/Tacotron2/scripts/prepare_dataset.sh | 31 - demo/Tacotron2/scripts/prepare_mels.sh | 36 - demo/Tacotron2/tacotron2/arg_parser.py | 98 -- demo/Tacotron2/tacotron2/data_function.py | 145 -- demo/Tacotron2/tacotron2/loss_function.py | 36 - demo/Tacotron2/tacotron2/model.py | 681 --------- demo/Tacotron2/tacotron2/text/LICENCE | 19 - demo/Tacotron2/tacotron2/text/__init__.py | 74 - demo/Tacotron2/tacotron2/text/cleaners.py | 106 -- demo/Tacotron2/tacotron2/text/cmudict.py | 81 -- demo/Tacotron2/tacotron2/text/numbers.py | 87 -- demo/Tacotron2/tacotron2/text/symbols.py | 34 - demo/Tacotron2/tensorrt/convert_onnx2trt.py | 168 --- .../tensorrt/convert_tacotron22onnx.py | 418 ------ .../tensorrt/convert_waveglow2onnx.py | 167 --- demo/Tacotron2/tensorrt/generate_decoder.py | 212 --- demo/Tacotron2/tensorrt/inference_trt.py | 491 ------- .../tensorrt/run_latency_tests_trt.sh | 17 - demo/Tacotron2/tensorrt/test_infer_trt.py | 230 --- demo/Tacotron2/tensorrt/trt_utils.py | 154 -- demo/Tacotron2/test_infer.py | 198 --- demo/Tacotron2/test_infer.sh | 126 -- demo/Tacotron2/train.py | 535 ------- demo/Tacotron2/waveglow/arg_parser.py | 55 - demo/Tacotron2/waveglow/data_function.py | 78 - demo/Tacotron2/waveglow/denoiser.py | 53 - demo/Tacotron2/waveglow/loss_function.py | 38 - demo/Tacotron2/waveglow/model.py | 343 ----- .../HuggingFace-Diffusers/README.md | 36 - .../TensorRT-diffusers-txt2img.ipynb | 1290 ----------------- docker/build.sh | 2 +- docker/launch.sh | 2 +- docker/rockylinux8.Dockerfile | 105 ++ docker/rockylinux9.Dockerfile | 104 ++ docker/ubuntu-20.04.Dockerfile | 20 +- docker/ubuntu-22.04-aarch64.Dockerfile | 112 ++ docker/ubuntu-22.04.Dockerfile | 22 +- docker/ubuntu-cross-aarch64.Dockerfile | 134 ++ include/NvInfer.h | 65 +- include/NvInferConsistency.h | 2 + include/NvInferLegacyDims.h | 4 +- include/NvInferRuntimeBase.h | 22 +- include/NvInferRuntimeCommon.h | 2 + include/NvInferRuntimePlugin.h | 2 + include/NvInferSafeRuntime.h | 2 + include/NvInferVersion.h | 4 +- parsers/CMakeLists.txt | 2 +- parsers/common/half.h | 2 +- parsers/common/ieee_half.h | 2 +- parsers/common/parserUtils.h | 2 +- parsers/onnx | 2 +- plugin/CMakeLists.txt | 22 +- plugin/batchTilePlugin/CMakeLists.txt | 2 +- plugin/batchTilePlugin/batchTilePlugin.cpp | 2 +- plugin/batchTilePlugin/batchTilePlugin.h | 2 +- plugin/batchedNMSPlugin/CMakeLists.txt | 2 +- .../batchedNMSPlugin/batchedNMSInference.cu | 2 +- plugin/batchedNMSPlugin/batchedNMSPlugin.cpp | 2 +- plugin/batchedNMSPlugin/batchedNMSPlugin.h | 2 +- plugin/batchedNMSPlugin/gatherNMSOutputs.h | 2 +- plugin/bertQKVToContextPlugin/CMakeLists.txt | 2 +- .../fused_multihead_attention/CMakeLists.txt | 2 +- .../include/fused_multihead_attention.h | 463 +++--- .../fused_multihead_attention_common.h | 2 +- ...head_attention_fp16_128_64_kernel.sm75.cpp | 2 +- ...head_attention_fp16_128_64_kernel.sm80.cpp | 2 +- ...head_attention_fp16_128_64_kernel.sm87.cpp | 2 +- ...head_attention_fp16_128_64_kernel.sm90.cpp | 2 +- ...head_attention_fp16_384_64_kernel.sm75.cpp | 2 +- ...head_attention_fp16_384_64_kernel.sm80.cpp | 2 +- ...head_attention_fp16_384_64_kernel.sm86.cpp | 2 +- ...head_attention_fp16_384_64_kernel.sm87.cpp | 2 +- ...head_attention_fp16_384_64_kernel.sm90.cpp | 2 +- ...head_attention_fp16_512_64_kernel.sm90.cpp | 2 +- ...ihead_attention_fp16_64_64_kernel.sm75.cpp | 2 +- ...ihead_attention_fp16_64_64_kernel.sm80.cpp | 2 +- ...ihead_attention_fp16_64_64_kernel.sm87.cpp | 2 +- ...ihead_attention_fp16_64_64_kernel.sm90.cpp | 2 +- ...ihead_attention_fp16_96_64_kernel.sm75.cpp | 2 +- ...ihead_attention_fp16_96_64_kernel.sm80.cpp | 2 +- ...ihead_attention_fp16_96_64_kernel.sm87.cpp | 2 +- ...ihead_attention_fp16_96_64_kernel.sm90.cpp | 2 +- ...head_attention_int8_128_64_kernel.sm75.cpp | 2 +- ...head_attention_int8_128_64_kernel.sm80.cpp | 2 +- ...head_attention_int8_128_64_kernel.sm87.cpp | 2 +- ...head_attention_int8_128_64_kernel.sm90.cpp | 2 +- ...head_attention_int8_384_64_kernel.sm75.cpp | 2 +- ...head_attention_int8_384_64_kernel.sm80.cpp | 2 +- ...head_attention_int8_384_64_kernel.sm87.cpp | 2 +- ...head_attention_int8_384_64_kernel.sm90.cpp | 2 +- ...head_attention_int8_512_64_kernel.sm90.cpp | 2 +- ...ihead_attention_int8_64_64_kernel.sm80.cpp | 2 +- ...ihead_attention_int8_96_64_kernel.sm80.cpp | 2 +- .../CMakeLists.txt | 2 +- .../include/fused_multihead_attention_v2.h | 10 +- ...d_attention_v2_fp16_128_32_kernel.sm75.cpp | 2 +- ...d_attention_v2_fp16_128_32_kernel.sm80.cpp | 2 +- ...d_attention_v2_fp16_128_64_kernel.sm75.cpp | 2 +- ...d_attention_v2_fp16_128_64_kernel.sm80.cpp | 2 +- ...d_attention_v2_fp16_128_64_kernel.sm86.cpp | 2 +- ...d_attention_v2_fp16_128_64_kernel.sm87.cpp | 2 +- ...d_attention_v2_fp16_128_64_kernel.sm90.cpp | 2 +- ...d_attention_v2_fp16_256_32_kernel.sm75.cpp | 2 +- ...d_attention_v2_fp16_256_32_kernel.sm80.cpp | 2 +- ...d_attention_v2_fp16_256_64_kernel.sm75.cpp | 2 +- ...d_attention_v2_fp16_256_64_kernel.sm80.cpp | 2 +- ...d_attention_v2_fp16_256_64_kernel.sm86.cpp | 2 +- ...d_attention_v2_fp16_256_64_kernel.sm87.cpp | 2 +- ...d_attention_v2_fp16_256_64_kernel.sm90.cpp | 2 +- ...d_attention_v2_fp16_384_64_kernel.sm75.cpp | 2 +- ...d_attention_v2_fp16_384_64_kernel.sm80.cpp | 2 +- ...d_attention_v2_fp16_384_64_kernel.sm86.cpp | 2 +- ...d_attention_v2_fp16_384_64_kernel.sm87.cpp | 2 +- ...d_attention_v2_fp16_384_64_kernel.sm90.cpp | 2 +- ...d_attention_v2_fp16_512_32_kernel.sm75.cpp | 2 +- ...d_attention_v2_fp16_512_32_kernel.sm80.cpp | 2 +- ...d_attention_v2_fp16_512_64_kernel.sm75.cpp | 2 +- ...d_attention_v2_fp16_512_64_kernel.sm80.cpp | 2 +- ...d_attention_v2_fp16_512_64_kernel.sm90.cpp | 2 +- ...ad_attention_v2_fp16_64_64_kernel.sm75.cpp | 2 +- ...ad_attention_v2_fp16_64_64_kernel.sm80.cpp | 2 +- ...ad_attention_v2_fp16_64_64_kernel.sm86.cpp | 2 +- ...ad_attention_v2_fp16_64_64_kernel.sm87.cpp | 2 +- ...ad_attention_v2_fp16_64_64_kernel.sm90.cpp | 2 +- ...ad_attention_v2_fp16_96_64_kernel.sm75.cpp | 2 +- ...ad_attention_v2_fp16_96_64_kernel.sm80.cpp | 2 +- ...ad_attention_v2_fp16_96_64_kernel.sm86.cpp | 2 +- ...ad_attention_v2_fp16_96_64_kernel.sm87.cpp | 2 +- ...ad_attention_v2_fp16_96_64_kernel.sm90.cpp | 2 +- ...ttention_v2_il_int8_128_32_kernel.sm80.cpp | 2 +- ...ttention_v2_il_int8_128_64_kernel.sm87.cpp | 2 +- ...ttention_v2_il_int8_128_64_kernel.sm90.cpp | 2 +- ...ttention_v2_il_int8_192_64_kernel.sm87.cpp | 2 +- ...ttention_v2_il_int8_192_64_kernel.sm90.cpp | 2 +- ...ttention_v2_il_int8_256_64_kernel.sm87.cpp | 2 +- ...ttention_v2_il_int8_256_64_kernel.sm90.cpp | 2 +- ...ttention_v2_il_int8_384_64_kernel.sm87.cpp | 2 +- ...ttention_v2_il_int8_384_64_kernel.sm90.cpp | 2 +- ...attention_v2_il_int8_64_64_kernel.sm80.cpp | 2 +- ...attention_v2_il_int8_64_64_kernel.sm87.cpp | 2 +- ...attention_v2_il_int8_64_64_kernel.sm90.cpp | 2 +- ...attention_v2_il_int8_96_64_kernel.sm80.cpp | 2 +- ...attention_v2_il_int8_96_64_kernel.sm87.cpp | 2 +- ...attention_v2_il_int8_96_64_kernel.sm90.cpp | 2 +- ...d_attention_v2_int8_128_32_kernel.sm75.cpp | 2 +- ...d_attention_v2_int8_128_32_kernel.sm80.cpp | 2 +- ...d_attention_v2_int8_128_64_kernel.sm72.cpp | 2 +- ...d_attention_v2_int8_128_64_kernel.sm75.cpp | 2 +- ...d_attention_v2_int8_128_64_kernel.sm80.cpp | 2 +- ...d_attention_v2_int8_128_64_kernel.sm86.cpp | 2 +- ...d_attention_v2_int8_128_64_kernel.sm87.cpp | 2 +- ...d_attention_v2_int8_128_64_kernel.sm90.cpp | 2 +- ...d_attention_v2_int8_192_64_kernel.sm72.cpp | 2 +- ...d_attention_v2_int8_192_64_kernel.sm75.cpp | 2 +- ...d_attention_v2_int8_192_64_kernel.sm80.cpp | 2 +- ...d_attention_v2_int8_192_64_kernel.sm86.cpp | 2 +- ...d_attention_v2_int8_192_64_kernel.sm87.cpp | 2 +- ...d_attention_v2_int8_192_64_kernel.sm90.cpp | 2 +- ...d_attention_v2_int8_256_32_kernel.sm75.cpp | 2 +- ...d_attention_v2_int8_256_32_kernel.sm80.cpp | 2 +- ...d_attention_v2_int8_256_64_kernel.sm72.cpp | 2 +- ...d_attention_v2_int8_256_64_kernel.sm75.cpp | 2 +- ...d_attention_v2_int8_256_64_kernel.sm80.cpp | 2 +- ...d_attention_v2_int8_256_64_kernel.sm86.cpp | 2 +- ...d_attention_v2_int8_256_64_kernel.sm87.cpp | 2 +- ...d_attention_v2_int8_256_64_kernel.sm90.cpp | 2 +- ...d_attention_v2_int8_384_64_kernel.sm72.cpp | 2 +- ...d_attention_v2_int8_384_64_kernel.sm75.cpp | 2 +- ...d_attention_v2_int8_384_64_kernel.sm80.cpp | 2 +- ...d_attention_v2_int8_384_64_kernel.sm86.cpp | 2 +- ...d_attention_v2_int8_384_64_kernel.sm87.cpp | 2 +- ...d_attention_v2_int8_384_64_kernel.sm90.cpp | 2 +- ...d_attention_v2_int8_512_32_kernel.sm75.cpp | 2 +- ...d_attention_v2_int8_512_32_kernel.sm80.cpp | 2 +- ...d_attention_v2_int8_512_64_kernel.sm75.cpp | 2 +- ...d_attention_v2_int8_512_64_kernel.sm80.cpp | 2 +- ...d_attention_v2_int8_512_64_kernel.sm90.cpp | 2 +- ...ad_attention_v2_int8_64_64_kernel.sm80.cpp | 2 +- ...ad_attention_v2_int8_64_64_kernel.sm87.cpp | 2 +- ...ad_attention_v2_int8_64_64_kernel.sm90.cpp | 2 +- ...ad_attention_v2_int8_96_64_kernel.sm80.cpp | 2 +- ...ad_attention_v2_int8_96_64_kernel.sm87.cpp | 2 +- ...ad_attention_v2_int8_96_64_kernel.sm90.cpp | 2 +- .../qkvToContextInt8InterleavedPlugin.cpp | 2 +- .../bertQKVToContextPlugin/zeroPadding2d.cu | 2 +- plugin/bertQKVToContextPlugin/zeroPadding2d.h | 2 +- plugin/clipPlugin/CMakeLists.txt | 2 +- plugin/clipPlugin/clip.cu | 2 +- plugin/clipPlugin/clip.h | 2 +- plugin/common/CMakeLists.txt | 2 +- plugin/common/bboxUtils.h | 2 +- plugin/common/bertCommon.h | 24 +- plugin/common/cub_helper.h | 2 +- plugin/common/cudaDriverWrapper.cpp | 2 +- plugin/common/cudaDriverWrapper.h | 2 +- plugin/common/dimsHelpers.h | 2 +- plugin/common/half.h | 2 +- plugin/common/kernels/CMakeLists.txt | 2 +- plugin/common/kernels/bboxDeltas2Proposals.cu | 2 +- plugin/common/kernels/cropAndResizeKernel.cu | 2 +- plugin/common/kernels/decodeBbox3DKernels.cu | 2 +- plugin/common/kernels/detectionForward.cu | 2 +- plugin/common/kernels/extractFgScores.cu | 2 +- plugin/common/kernels/generateAnchors.cu | 2 +- plugin/common/kernels/gridAnchorLayer.cu | 2 +- plugin/common/kernels/kernel.cpp | 2 +- plugin/common/kernels/lReLU.cu | 2 +- plugin/common/kernels/maskRCNNKernels.cu | 2 +- plugin/common/kernels/maskRCNNKernels.h | 2 +- plugin/common/kernels/nmsLayer.cu | 2 +- plugin/common/kernels/permuteData.cu | 2 +- plugin/common/kernels/pillarScatterKernels.cu | 2 +- plugin/common/kernels/priorBoxLayer.cu | 2 +- plugin/common/kernels/proposalKernel.cu | 2 +- plugin/common/kernels/proposalsForward.cu | 2 +- plugin/common/kernels/reducedMathPlugin.h | 2 +- plugin/common/kernels/regionForward.cu | 2 +- plugin/common/kernels/reorgForward.cu | 2 +- plugin/common/kernels/roiPooling.cu | 2 +- plugin/common/kernels/rproiInferenceFused.cu | 2 +- plugin/common/kernels/sortScoresPerClass.cu | 2 +- plugin/common/kernels/sortScoresPerImage.cu | 2 +- .../common/kernels/voxelGeneratorKernels.cu | 2 +- plugin/common/mrcnn_config.h | 2 +- plugin/common/nmsUtils.h | 2 +- plugin/common/reducedMathPlugin.cpp | 2 +- plugin/common/serialize.hpp | 2 +- plugin/common/templates.h | 2 +- plugin/common/vfcCommon.cpp | 2 +- plugin/common/vfcCommon.h | 2 +- plugin/coordConvACPlugin/CMakeLists.txt | 2 +- .../coordConvACPlugin/coordConvACPlugin.cpp | 2 +- plugin/coordConvACPlugin/coordConvACPlugin.h | 2 +- .../coordConvACPluginKernels.cu | 2 +- plugin/cropAndResizePlugin/CMakeLists.txt | 2 +- .../cropAndResizePlugin.cpp | 2 +- .../cropAndResizePlugin/cropAndResizePlugin.h | 2 +- plugin/decodeBbox3DPlugin/CMakeLists.txt | 2 +- plugin/decodeBbox3DPlugin/decodeBbox3D.cpp | 2 +- plugin/decodeBbox3DPlugin/decodeBbox3D.h | 2 +- plugin/detectionLayerPlugin/CMakeLists.txt | 2 +- .../detectionLayerPlugin.cpp | 2 +- .../detectionLayerPlugin.h | 2 +- .../CMakeLists.txt | 2 +- .../disentangledAttentionPlugin.cpp | 2 +- .../disentangledAttentionPlugin.h | 2 +- .../disentangledKernel.cu | 2 +- plugin/efficientNMSPlugin/CMakeLists.txt | 2 +- .../efficientNMSInference.cu | 2 +- .../efficientNMSInference.cuh | 2 +- .../efficientNMSInference.h | 2 +- .../efficientNMSParameters.h | 2 +- .../efficientNMSPlugin/efficientNMSPlugin.cpp | 2 +- .../efficientNMSPlugin/efficientNMSPlugin.h | 2 +- .../efficientNMSPlugin/tftrt/CMakeLists.txt | 2 +- .../tftrt/efficientNMSExplicitTFTRTPlugin.cpp | 2 +- .../tftrt/efficientNMSExplicitTFTRTPlugin.h | 2 +- .../tftrt/efficientNMSImplicitTFTRTPlugin.cpp | 2 +- .../tftrt/efficientNMSImplicitTFTRTPlugin.h | 2 +- plugin/embLayerNormPlugin/CMakeLists.txt | 2 +- .../embLayerNormPlugin/embLayerNormKernel.cu | 2 +- .../embLayerNormPlugin/embLayerNormPlugin.cpp | 2 +- .../embLayerNormPlugin/embLayerNormPlugin.h | 2 +- .../embLayerNormVarSeqlenKernelHFace.cu | 2 +- .../embLayerNormVarSeqlenKernelMTron.cu | 2 +- .../embLayerNormVarSeqlenPlugin.cpp | 2 +- .../embLayerNormVarSeqlenPlugin.h | 2 +- plugin/exports-vfc_plugin.def | 4 +- plugin/exports-vfc_plugin.map | 2 +- plugin/exports.def | 4 +- plugin/exports.map | 2 +- plugin/fcPlugin/CMakeLists.txt | 2 +- plugin/fcPlugin/fcPlugin.cpp | 19 +- plugin/fcPlugin/fcPlugin.h | 121 +- plugin/flattenConcat/CMakeLists.txt | 2 +- plugin/geluPlugin/CMakeLists.txt | 2 +- plugin/geluPlugin/geluKernel.cu | 2 +- plugin/geluPlugin/geluPlugin.cpp | 2 +- plugin/geluPlugin/geluPlugin.h | 2 +- plugin/generateDetectionPlugin/CMakeLists.txt | 2 +- .../generateDetectionPlugin.cpp | 2 +- .../generateDetectionPlugin.h | 2 +- plugin/gridAnchorPlugin/CMakeLists.txt | 2 +- .../groupNormalizationPlugin/CMakeLists.txt | 2 +- .../groupNormalizationKernel.cu | 2 +- .../CMakeLists.txt | 2 +- .../instanceNormCommon.h | 2 +- .../instanceNormFwd.h | 2 +- .../instanceNormFwdImpl.cu | 2 +- plugin/leakyReluPlugin/CMakeLists.txt | 2 +- plugin/leakyReluPlugin/lReluPlugin.cpp | 2 +- plugin/leakyReluPlugin/lReluPlugin.h | 2 +- .../modulatedDeformConvPlugin/CMakeLists.txt | 2 +- .../commonCudaHelper.h | 2 +- .../CMakeLists.txt | 2 +- .../multilevelCropAndResizePlugin.cpp | 2 +- .../multilevelCropAndResizePlugin.h | 2 +- plugin/multilevelProposeROI/CMakeLists.txt | 2 +- .../multilevelProposeROIPlugin.cpp | 2 +- .../multilevelProposeROIPlugin.h | 2 +- .../multilevelProposeROI/tlt_mrcnn_config.h | 2 +- .../CMakeLists.txt | 2 +- .../multiscaleDeformableAttn.cu | 2 +- .../multiscaleDeformableAttn.h | 2 +- .../multiscaleDeformableAttnPlugin.cpp | 2 +- .../multiscaleDeformableIm2ColCuda.cuh | 2 +- plugin/nmsPlugin/CMakeLists.txt | 2 +- plugin/nmsPlugin/nmsPlugin.cpp | 2 +- plugin/nmsPlugin/nmsPlugin.h | 2 +- plugin/normalizePlugin/CMakeLists.txt | 2 +- plugin/nvFasterRCNN/CMakeLists.txt | 2 +- plugin/pillarScatterPlugin/CMakeLists.txt | 2 +- plugin/pillarScatterPlugin/pillarScatter.cpp | 2 +- plugin/pillarScatterPlugin/pillarScatter.h | 2 +- plugin/priorBoxPlugin/CMakeLists.txt | 2 +- plugin/proposalLayerPlugin/CMakeLists.txt | 2 +- .../proposalLayerPlugin.cpp | 2 +- .../proposalLayerPlugin/proposalLayerPlugin.h | 2 +- plugin/proposalPlugin/CMakeLists.txt | 2 +- plugin/proposalPlugin/proposalPlugin.cpp | 6 +- plugin/proposalPlugin/proposalPlugin.h | 2 +- plugin/pyramidROIAlignPlugin/CMakeLists.txt | 2 +- .../pyramidROIAlignPlugin.cpp | 2 +- .../pyramidROIAlignPlugin.h | 2 +- plugin/regionPlugin/CMakeLists.txt | 2 +- plugin/regionPlugin/regionPlugin.cpp | 2 +- plugin/regionPlugin/regionPlugin.h | 2 +- plugin/reorgPlugin/CMakeLists.txt | 2 +- plugin/reorgPlugin/reorgPlugin.cpp | 2 +- plugin/reorgPlugin/reorgPlugin.h | 2 +- plugin/resizeNearestPlugin/CMakeLists.txt | 2 +- .../resizeNearestPlugin.cpp | 2 +- .../resizeNearestPlugin/resizeNearestPlugin.h | 2 +- plugin/roiAlignPlugin/CMakeLists.txt | 2 +- plugin/roiAlignPlugin/roiAlignKernel.h | 2 +- plugin/roiAlignPlugin/roiAlignPlugin.cpp | 2 +- plugin/roiAlignPlugin/roiAlignPlugin.h | 2 +- plugin/scatterElementsPlugin/CMakeLists.txt | 2 +- plugin/scatterElementsPlugin/TensorInfo.cuh | 2 +- plugin/scatterElementsPlugin/atomics.cuh | 2 +- plugin/scatterElementsPlugin/reducer.cuh | 2 +- .../scatterElementsPlugin.cpp | 2 +- .../scatterElementsPlugin.h | 2 +- .../scatterElementsPluginKernel.cu | 2 +- .../scatterElementsPluginKernel.h | 2 +- plugin/scatterPlugin/CMakeLists.txt | 2 +- plugin/scatterPlugin/scatterLayer.cu | 2 +- plugin/skipLayerNormPlugin/CMakeLists.txt | 2 +- ...skipLayerNormInt8InterleavedKernelHFace.cu | 2 +- ...skipLayerNormInt8InterleavedKernelMTron.cu | 2 +- .../skipLayerNormInt8InterleavedPlugin.cpp | 2 +- .../skipLayerNormInt8InterleavedPlugin.h | 2 +- .../skipLayerNormKernel.cu | 2 +- .../skipLayerNormPlugin.cpp | 2 +- .../skipLayerNormPlugin/skipLayerNormPlugin.h | 2 +- plugin/specialSlicePlugin/CMakeLists.txt | 2 +- .../specialSlicePlugin/specialSlicePlugin.cpp | 2 +- .../specialSlicePlugin/specialSlicePlugin.h | 2 +- plugin/splitPlugin/CMakeLists.txt | 2 +- plugin/splitPlugin/split.cu | 2 +- plugin/splitPlugin/split.h | 2 +- plugin/voxelGeneratorPlugin/CMakeLists.txt | 2 +- .../voxelGeneratorPlugin/voxelGenerator.cpp | 2 +- plugin/voxelGeneratorPlugin/voxelGenerator.h | 2 +- python/CMakeLists.txt | 34 +- .../docstrings/infer/pyAlgorithmSelectorDoc.h | 10 +- python/docstrings/infer/pyCoreDoc.h | 31 +- .../docstrings/infer/pyFoundationalTypesDoc.h | 8 +- python/docstrings/infer/pyGraphDoc.h | 9 +- python/docstrings/infer/pyInt8Doc.h | 2 +- python/docstrings/infer/pyPluginDoc.h | 96 +- python/docstrings/parsers/pyOnnxDoc.h | 2 +- python/docstrings/pyTensorRTDoc.h | 2 +- python/include/ForwardDeclarations.h | 2 +- python/include/utils.h | 4 +- python/packaging/bindings_wheel/setup.py | 2 +- .../bindings_wheel/tensorrt/__init__.py | 12 +- python/packaging/frontend_sdist/setup.py | 12 +- .../frontend_sdist/tensorrt/__init__.py | 2 +- python/packaging/libs_wheel/setup.py | 2 +- .../libs_wheel/tensorrt_libs/__init__.py | 9 +- python/packaging/metapackage/setup.py | 2 +- python/src/infer/pyAlgorithmSelector.cpp | 12 +- python/src/infer/pyCore.cpp | 53 +- python/src/infer/pyFoundationalTypes.cpp | 14 +- python/src/infer/pyGraph.cpp | 2 +- python/src/infer/pyInt8.cpp | 36 +- python/src/infer/pyPlugin.cpp | 291 ++-- python/src/parsers/pyOnnx.cpp | 2 +- python/src/pyTensorRT.cpp | 2 +- python/src/utils.cpp | 4 +- .../Additional Examples/helper.py | 2 +- quickstart/IntroNotebooks/helper.py | 2 +- quickstart/IntroNotebooks/onnx_helper.py | 2 +- quickstart/Makefile | 2 +- quickstart/Makefile.config | 2 +- quickstart/SemanticSegmentation/Makefile | 2 +- quickstart/SemanticSegmentation/export.py | 2 +- .../SemanticSegmentation/tutorial-runtime.cpp | 2 +- quickstart/common/logger.cpp | 2 +- quickstart/common/logger.h | 2 +- quickstart/common/logging.h | 2 +- quickstart/common/util.cpp | 2 +- quickstart/common/util.h | 2 +- quickstart/deploy_to_triton/config.pbtxt | 2 +- .../deploy_to_triton/export_resnet_to_onnx.py | 2 +- quickstart/deploy_to_triton/triton_client.py | 2 +- samples/CMakeLists.txt | 2 +- samples/CMakeSamplesTemplate.txt | 16 +- samples/common/BatchStream.h | 2 +- samples/common/EntropyCalibrator.h | 2 +- samples/common/ErrorRecorder.h | 4 +- samples/common/argsParser.h | 8 +- samples/common/bfloat16.cpp | 2 +- samples/common/bfloat16.h | 2 +- samples/common/buffers.h | 2 +- samples/common/common.h | 2 +- samples/common/dumpTFWts.py | 2 +- samples/common/getOptions.cpp | 2 +- samples/common/getOptions.h | 2 +- samples/common/getoptWin.h | 2 +- samples/common/half.h | 2 +- samples/common/logger.cpp | 2 +- samples/common/logger.h | 2 +- samples/common/logging.h | 4 +- samples/common/parserOnnxConfig.h | 2 +- samples/common/safeCommon.h | 4 +- samples/common/sampleConfig.h | 2 +- samples/common/sampleDevice.cpp | 2 +- samples/common/sampleDevice.h | 2 +- samples/common/sampleEngines.cpp | 12 +- samples/common/sampleEngines.h | 2 +- samples/common/sampleEntrypoints.h | 2 +- samples/common/sampleInference.cpp | 19 +- samples/common/sampleInference.h | 2 +- samples/common/sampleOptions.cpp | 2 +- samples/common/sampleOptions.h | 2 +- samples/common/sampleReporting.cpp | 2 +- samples/common/sampleReporting.h | 2 +- samples/common/sampleUtils.cpp | 2 +- samples/common/sampleUtils.h | 2 +- samples/common/streamReader.h | 2 +- samples/python/common.py | 31 +- samples/python/detectron2/build_engine.py | 133 +- samples/python/detectron2/create_onnx.py | 557 +++++-- samples/python/detectron2/eval_coco.py | 72 +- samples/python/detectron2/image_batcher.py | 36 +- samples/python/detectron2/infer.py | 194 ++- samples/python/detectron2/onnx_utils.py | 105 +- samples/python/detectron2/visualize.py | 231 ++- samples/python/downloader.py | 35 +- samples/python/efficientdet/build_engine.py | 193 ++- samples/python/efficientdet/compare_tf.py | 128 +- samples/python/efficientdet/create_onnx.py | 304 +++- samples/python/efficientdet/eval_coco.py | 52 +- samples/python/efficientdet/image_batcher.py | 27 +- samples/python/efficientdet/infer.py | 99 +- samples/python/efficientdet/infer_tf.py | 113 +- samples/python/efficientdet/onnx_utils.py | 30 +- samples/python/efficientdet/visualize.py | 22 +- samples/python/efficientnet/build_engine.py | 50 +- samples/python/efficientnet/compare_tf.py | 45 +- samples/python/efficientnet/create_onnx.py | 38 +- samples/python/efficientnet/eval_gt.py | 24 +- samples/python/efficientnet/image_batcher.py | 22 +- samples/python/efficientnet/infer.py | 20 +- .../build_and_refit_engine.py | 205 ++- .../data_processing.py | 6 +- .../engine_refit_onnx_bidaf/prepare_model.py | 6 +- .../onnx_resnet50.py | 12 +- .../python/network_api_pytorch_mnist/model.py | 21 +- .../network_api_pytorch_mnist/sample.py | 41 +- .../python/onnx_custom_plugin/CMakeLists.txt | 2 +- .../onnx_custom_plugin/load_plugin_lib.py | 11 +- samples/python/onnx_custom_plugin/model.py | 31 +- samples/python/onnx_custom_plugin/sample.py | 51 +- .../test_custom_hardmax_plugin.py | 32 +- .../python/onnx_packnet/convert_to_onnx.py | 22 +- .../python/onnx_packnet/post_processing.py | 19 +- samples/python/python_plugin/CMakeLists.txt | 2 +- .../python_plugin/circ_pad_plugin_cpp.py | 29 +- .../circ_pad_plugin_cuda_python.py | 112 +- .../python_plugin/circ_pad_plugin_cupy.py | 61 +- .../circ_pad_plugin_inetdef_cuda_python.py | 126 +- .../python_plugin/circ_pad_plugin_numba.py | 17 +- .../python_plugin/circ_pad_plugin_torch.py | 18 +- .../python_plugin/circ_pad_plugin_triton.py | 68 +- .../circ_plugin_cpp/circ_pad_plugin.cu | 5 +- samples/python/python_plugin/utils.py | 75 +- samples/python/scripts/download_mnist_data.sh | 2 +- samples/python/scripts/download_mnist_pgms.py | 2 +- .../simple_progress_monitor.py | 59 +- .../build_engine.py | 132 +- .../compare_tf.py | 289 ++-- .../create_onnx.py | 675 +++++++-- .../eval_coco.py | 119 +- .../image_batcher.py | 38 +- .../tensorflow_object_detection_api/infer.py | 137 +- .../onnx_utils.py | 87 +- .../visualize.py | 237 ++- samples/python/yolov3_onnx/data_processing.py | 27 +- .../python/yolov3_onnx/onnx_to_tensorrt.py | 69 +- samples/python/yolov3_onnx/yolov3_to_onnx.py | 112 +- .../sampleAlgorithmSelector/CMakeLists.txt | 2 +- .../sampleAlgorithmSelector.cpp | 2 +- samples/sampleCharRNN/CMakeLists.txt | 2 +- samples/sampleCharRNN/sampleCharRNN.cpp | 2 +- samples/sampleDynamicReshape/CMakeLists.txt | 2 +- .../sampleDynamicReshape.cpp | 2 +- samples/sampleINT8API/CMakeLists.txt | 2 +- samples/sampleINT8API/sampleINT8API.cpp | 2 +- samples/sampleIOFormats/CMakeLists.txt | 7 +- samples/sampleIOFormats/sampleIOFormats.cpp | 307 ++-- samples/sampleNamedDimensions/CMakeLists.txt | 2 +- samples/sampleNamedDimensions/create_model.py | 2 +- .../sampleNamedDimensions.cpp | 2 +- samples/sampleNonZeroPlugin/README.md | 8 +- samples/sampleNonZeroPlugin/nonZeroKernel.cu | 40 +- samples/sampleNonZeroPlugin/nonZeroKernel.h | 8 +- .../sampleNonZeroPlugin.cpp | 79 +- samples/sampleOnnxMNIST/CMakeLists.txt | 2 +- samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp | 2 +- .../sampleOnnxMnistCoordConvAC/CMakeLists.txt | 2 +- .../sampleOnnxMnistCoordConvAC/coord_conv.py | 2 +- .../mnist_coord_conv_train.py | 2 +- .../modify_onnx_ac.py | 2 +- .../sampleOnnxMnistCoordConvAC.cpp | 2 +- samples/sampleProgressMonitor/CMakeLists.txt | 2 +- .../sampleProgressMonitor.cpp | 2 +- samples/trtexec/CMakeLists.txt | 2 +- samples/trtexec/prn_utils.py | 2 +- samples/trtexec/profiler.py | 2 +- samples/trtexec/tracer.py | 2 +- samples/trtexec/trtexec.cpp | 12 +- samples/utils/fileLock.cpp | 2 +- samples/utils/fileLock.h | 2 +- samples/utils/timingCache.cpp | 2 +- samples/utils/timingCache.h | 2 +- scripts/convert_te_onnx_to_trt_onnx.py | 2 +- scripts/copyright-scan.py | 2 +- scripts/stubify.sh | 2 +- third_party/ieee/half.h | 2 +- third_party/protobuf.cmake | 9 +- tools/Polygraphy/CHANGELOG.md | 9 + tools/Polygraphy/Makefile | 2 +- tools/Polygraphy/docs/conf.py | 19 +- tools/Polygraphy/docs/requirements.txt | 6 +- .../build_and_run.py | 10 +- .../load_and_run.py | 2 +- .../api/01_comparing_frameworks/example.py | 8 +- .../api/02_validating_on_a_dataset/example.py | 4 +- .../example.py | 10 +- .../example.py | 13 +- .../05_using_tensorrt_network_api/example.py | 14 +- .../06_immediate_eval_api/build_and_run.py | 10 +- .../api/06_immediate_eval_api/load_and_run.py | 2 +- .../07_tensorrt_and_dynamic_shapes/example.py | 17 +- .../example.py | 2 +- .../example.py | 13 +- .../data_loader.py | 6 +- .../01_match_and_replace_plugin/README.md | 15 +- .../plugins/toyPlugin/__init__.py | 0 .../plugins/toyPlugin/pattern.py | 29 +- ...hing_toy_plugin.onnx => toy_subgraph.onnx} | Bin .../create_config.py | 2 +- .../define_network.py | 3 +- .../data_loader.py | 7 +- .../generate_data.py | 2 +- .../add_constraints.py | 2 +- .../constrained_network.py | 2 +- .../polygraphy_reshape_destroyer/__init__.py | 2 +- .../args/__init__.py | 2 +- .../args/loader.py | 14 +- .../args/runner.py | 10 +- .../backend/__init__.py | 2 +- .../backend/loader.py | 6 +- .../backend/runner.py | 6 +- .../polygraphy_reshape_destroyer/export.py | 3 +- .../extension_module/setup.py | 2 +- tools/Polygraphy/polygraphy/__init__.py | 2 +- .../polygraphy/backend/base/loader.py | 2 +- .../polygraphy/backend/base/runner.py | 19 +- .../polygraphy/backend/base/util.py | 6 +- .../polygraphy/backend/common/loader.py | 2 +- .../polygraphy/backend/onnx/loader.py | 128 +- .../polygraphy/backend/onnx/util.py | 78 +- .../polygraphy/backend/onnxrt/loader.py | 6 +- .../polygraphy/backend/onnxrt/runner.py | 2 +- .../backend/pluginref/references.py | 6 +- .../polygraphy/backend/pluginref/runner.py | 10 +- .../polygraphy/backend/pyt/runner.py | 6 +- .../polygraphy/backend/tf/loader.py | 54 +- .../polygraphy/backend/tf/runner.py | 7 +- .../Polygraphy/polygraphy/backend/tf/util.py | 30 +- .../polygraphy/backend/trt/__init__.py | 1 + .../backend/trt/algorithm_selector.py | 2 +- .../polygraphy/backend/trt/calibrator.py | 55 +- .../polygraphy/backend/trt/config.py | 2 +- .../polygraphy/backend/trt/file_reader.py | 80 + .../polygraphy/backend/trt/loader.py | 174 ++- .../polygraphy/backend/trt/profile.py | 14 +- .../polygraphy/backend/trt/runner.py | 46 +- .../Polygraphy/polygraphy/backend/trt/util.py | 10 +- .../Polygraphy/polygraphy/common/interface.py | 2 +- tools/Polygraphy/polygraphy/common/struct.py | 10 +- .../polygraphy/comparator/comparator.py | 66 +- .../polygraphy/comparator/compare.py | 2 +- .../polygraphy/comparator/data_loader.py | 16 +- .../polygraphy/comparator/postprocess.py | 2 +- .../polygraphy/comparator/struct.py | 23 +- .../Polygraphy/polygraphy/comparator/util.py | 39 +- tools/Polygraphy/polygraphy/config.py | 14 +- tools/Polygraphy/polygraphy/constants.py | 2 +- tools/Polygraphy/polygraphy/cuda/cuda.py | 38 +- .../polygraphy/datatype/datatype.py | 2 +- tools/Polygraphy/polygraphy/datatype/numpy.py | 8 +- tools/Polygraphy/polygraphy/datatype/onnx.py | 14 +- .../Polygraphy/polygraphy/datatype/onnxrt.py | 8 +- .../polygraphy/datatype/tensorrt.py | 2 +- tools/Polygraphy/polygraphy/datatype/torch.py | 8 +- .../polygraphy/exception/exception.py | 2 +- tools/Polygraphy/polygraphy/func/func.py | 19 +- tools/Polygraphy/polygraphy/json/serde.py | 12 +- tools/Polygraphy/polygraphy/logger/logger.py | 10 +- tools/Polygraphy/polygraphy/mod/exporter.py | 69 +- tools/Polygraphy/polygraphy/mod/importer.py | 5 +- tools/Polygraphy/polygraphy/mod/util.py | 2 +- .../tools/args/backend/onnx/loader.py | 149 +- .../tools/args/backend/onnxrt/loader.py | 11 +- .../tools/args/backend/onnxrt/runner.py | 8 +- .../tools/args/backend/pluginref/runner.py | 10 +- .../tools/args/backend/runner_select.py | 6 +- .../tools/args/backend/tf/config.py | 16 +- .../tools/args/backend/tf/loader.py | 28 +- .../tools/args/backend/tf/runner.py | 9 +- .../tools/args/backend/trt/config.py | 197 ++- .../tools/args/backend/trt/loader.py | 195 ++- .../tools/args/backend/trt/runner.py | 25 +- .../Polygraphy/polygraphy/tools/args/base.py | 6 +- .../tools/args/comparator/comparator.py | 79 +- .../tools/args/comparator/compare.py | 23 +- .../tools/args/comparator/data_loader.py | 66 +- .../tools/args/comparator/postprocess.py | 10 +- .../polygraphy/tools/args/logger/logger.py | 14 +- .../Polygraphy/polygraphy/tools/args/model.py | 30 +- .../polygraphy/tools/args/util/util.py | 41 +- .../Polygraphy/polygraphy/tools/base/tool.py | 18 +- .../polygraphy/tools/check/check.py | 2 +- .../polygraphy/tools/check/subtool/lint.py | 113 +- .../polygraphy/tools/convert/convert.py | 22 +- .../Polygraphy/polygraphy/tools/data/data.py | 2 +- .../polygraphy/tools/data/subtool/to_input.py | 12 +- .../polygraphy/tools/debug/debug.py | 2 +- .../polygraphy/tools/debug/subtool/base.py | 31 +- .../polygraphy/tools/debug/subtool/build.py | 2 +- .../debug/subtool/iterative_debug_args.py | 76 +- .../tools/debug/subtool/precision.py | 66 +- .../polygraphy/tools/debug/subtool/reduce.py | 106 +- .../polygraphy/tools/debug/subtool/repeat.py | 14 +- .../polygraphy/tools/inspect/inspect.py | 11 +- .../tools/inspect/subtool/capability.py | 137 +- .../polygraphy/tools/inspect/subtool/data.py | 22 +- .../tools/inspect/subtool/diff_tactics.py | 18 +- .../polygraphy/tools/inspect/subtool/model.py | 37 +- .../tools/inspect/subtool/sparsity.py | 14 +- .../tools/inspect/subtool/tactics.py | 2 +- .../polygraphy/tools/plugin/plugin.py | 2 +- .../tools/plugin/subtool/list_plugins.py | 10 +- .../polygraphy/tools/plugin/subtool/match.py | 7 +- .../tools/plugin/subtool/plugin_base.py | 131 +- .../tools/plugin/subtool/replace.py | 113 +- tools/Polygraphy/polygraphy/tools/registry.py | 14 +- tools/Polygraphy/polygraphy/tools/run/run.py | 19 +- tools/Polygraphy/polygraphy/tools/script.py | 59 +- tools/Polygraphy/polygraphy/tools/sparse.py | 35 +- .../polygraphy/tools/surgeon/subtool/base.py | 2 +- .../tools/surgeon/subtool/extract.py | 51 +- .../tools/surgeon/subtool/insert.py | 35 +- .../polygraphy/tools/surgeon/subtool/prune.py | 14 +- .../tools/surgeon/subtool/sanitize.py | 31 +- .../polygraphy/tools/surgeon/surgeon.py | 11 +- .../polygraphy/tools/template/subtool/base.py | 2 +- .../tools/template/subtool/onnx_gs.py | 10 +- .../tools/template/subtool/trt_config.py | 7 +- .../tools/template/subtool/trt_network.py | 11 +- .../polygraphy/tools/template/template.py | 2 +- tools/Polygraphy/polygraphy/tools/util.py | 6 +- tools/Polygraphy/polygraphy/util/array.py | 8 +- tools/Polygraphy/polygraphy/util/util.py | 72 +- tools/Polygraphy/setup.py | 2 +- .../tests/backend/base/test_loader.py | 2 +- .../tests/backend/base/test_runner.py | 2 +- .../tests/backend/common/test_loader.py | 6 +- .../tests/backend/onnx/test_loader.py | 36 +- .../tests/backend/onnx/test_util.py | 10 +- .../tests/backend/onnxrt/test_loader.py | 7 +- .../tests/backend/onnxrt/test_runner.py | 9 +- .../tests/backend/pluginref/test_runner.py | 21 +- .../tests/backend/tf/test_loader.py | 17 +- .../tests/backend/tf/test_runner.py | 23 +- .../backend/trt/test_algorithm_selector.py | 2 +- .../tests/backend/trt/test_calibrator.py | 16 +- .../tests/backend/trt/test_loader.py | 63 +- .../tests/backend/trt/test_profile.py | 20 +- .../tests/backend/trt/test_runner.py | 149 +- .../Polygraphy/tests/backend/trt/test_util.py | 2 +- .../Polygraphy/tests/common/test_datatype.py | 2 +- .../Polygraphy/tests/common/test_interface.py | 2 +- tools/Polygraphy/tests/common/test_struct.py | 3 +- .../tests/comparator/test_comparator.py | 43 +- .../tests/comparator/test_compare.py | 2 +- .../tests/comparator/test_data_loader.py | 2 +- .../tests/comparator/test_postprocess.py | 3 +- .../tests/comparator/test_struct.py | 10 +- tools/Polygraphy/tests/conftest.py | 24 +- tools/Polygraphy/tests/cuda/test_cuda.py | 2 +- tools/Polygraphy/tests/func/test_func.py | 13 +- tools/Polygraphy/tests/helper.py | 11 +- tools/Polygraphy/tests/logger/test_logger.py | 16 +- tools/Polygraphy/tests/mod/conftest.py | 2 +- .../Polygraphy/tests/mod/test_dependencies.py | 16 +- tools/Polygraphy/tests/mod/test_exporter.py | 11 +- tools/Polygraphy/tests/mod/test_importer.py | 39 +- tools/Polygraphy/tests/mod/test_util.py | 2 +- tools/Polygraphy/tests/models/make_models.py | 220 ++- tools/Polygraphy/tests/models/meta.py | 274 +++- .../tests/models/plugins/toyPlugin/pattern.py | 29 +- ...hing_toy_plugin.onnx => toy_subgraph.onnx} | Bin .../tests/test_deprecated_aliases.py | 2 +- tools/Polygraphy/tests/test_examples.py | 5 +- tools/Polygraphy/tests/test_packaging.py | 14 +- tools/Polygraphy/tests/test_tests.py | 10 +- tools/Polygraphy/tests/test_ux.py | 6 +- .../tools/args/backend/onnx/test_loader.py | 97 +- .../tools/args/backend/onnxrt/test_loader.py | 9 +- .../tools/args/backend/test_runner_select.py | 8 +- .../tools/args/backend/tf/test_loader.py | 2 +- .../tools/args/backend/trt/test_config.py | 10 +- .../tools/args/backend/trt/test_loader.py | 12 +- .../tools/args/backend/trt/test_runner.py | 6 +- .../tools/args/comparator/test_comparator.py | 12 +- .../tools/args/comparator/test_compare.py | 40 +- .../tools/args/comparator/test_data_loader.py | 59 +- tools/Polygraphy/tests/tools/args/helper.py | 2 +- .../tests/tools/args/logger/test_logger.py | 14 +- .../tests/tools/args/test_docstrings.py | 22 +- .../Polygraphy/tests/tools/args/test_model.py | 36 +- .../tests/tools/args/util/test_util.py | 6 +- tools/Polygraphy/tests/tools/conftest.py | 3 +- .../tests/tools/fake_reduce_checker.py | 16 +- tools/Polygraphy/tests/tools/test_check.py | 78 +- tools/Polygraphy/tests/tools/test_convert.py | 63 +- tools/Polygraphy/tests/tools/test_data.py | 11 +- tools/Polygraphy/tests/tools/test_debug.py | 43 +- .../Polygraphy/tests/tools/test_deprecated.py | 2 +- tools/Polygraphy/tests/tools/test_inspect.py | 22 +- tools/Polygraphy/tests/tools/test_plugin.py | 8 +- .../Polygraphy/tests/tools/test_polygraphy.py | 2 +- tools/Polygraphy/tests/tools/test_run.py | 301 +++- tools/Polygraphy/tests/tools/test_script.py | 13 +- tools/Polygraphy/tests/tools/test_surgeon.py | 207 ++- tools/Polygraphy/tests/tools/test_template.py | 6 +- tools/Polygraphy/tests/util/test_array.py | 12 +- tools/Polygraphy/tests/util/test_serde.py | 2 +- tools/Polygraphy/tests/util/test_util.py | 73 +- .../trt-engine-explorer/trex/graphing.py | 1 + tools/onnx-graphsurgeon/CHANGELOG.md | 4 + .../examples/12_using_bf16/README.md | 26 + .../examples/12_using_bf16/generate.py | 29 +- .../examples/resources/12_bf16.onnx.png | Bin 0 -> 36504 bytes .../exporters/onnx_exporter.py | 69 +- .../onnx_graphsurgeon/ir/tensor.py | 54 +- .../onnx-graphsurgeon/tests/test_examples.py | 3 + tools/onnx-graphsurgeon/tests/test_ir.py | 10 +- 853 files changed, 11169 insertions(+), 12121 deletions(-) delete mode 100644 demo/Diffusion/calibration.py create mode 100644 demo/Diffusion/utils_ammo.py delete mode 100644 demo/Jasper/README.md delete mode 100644 demo/Tacotron2/README.md delete mode 100644 demo/Tacotron2/common/audio_processing.py delete mode 100644 demo/Tacotron2/common/layers.py delete mode 100644 demo/Tacotron2/common/stft.py delete mode 100644 demo/Tacotron2/common/utils.py delete mode 100644 demo/Tacotron2/config.json delete mode 100644 demo/Tacotron2/data_functions.py delete mode 100644 demo/Tacotron2/inference.py delete mode 100644 demo/Tacotron2/inference_perf.py delete mode 100644 demo/Tacotron2/main.py delete mode 100644 demo/Tacotron2/models.py delete mode 100644 demo/Tacotron2/multiproc.py delete mode 100644 demo/Tacotron2/phrases/phrase.txt delete mode 100644 demo/Tacotron2/phrases/phrase_1_128.txt delete mode 100644 demo/Tacotron2/phrases/phrase_1_256.txt delete mode 100644 demo/Tacotron2/phrases/phrase_1_64.txt delete mode 100644 demo/Tacotron2/phrases/phrase_4_256.txt delete mode 100644 demo/Tacotron2/phrases/phrase_4_64.txt delete mode 100644 demo/Tacotron2/phrases/phrase_8_256.txt delete mode 100644 demo/Tacotron2/phrases/phrase_8_64.txt delete mode 100644 demo/Tacotron2/preprocess_audio2mel.py delete mode 100644 demo/Tacotron2/requirements.txt delete mode 100644 demo/Tacotron2/run_latency_tests.sh delete mode 100755 demo/Tacotron2/scripts/download_checkpoints.sh delete mode 100755 demo/Tacotron2/scripts/inference_benchmark.sh delete mode 100755 demo/Tacotron2/scripts/install_prerequisites.sh delete mode 100755 demo/Tacotron2/scripts/prepare_dataset.sh delete mode 100644 demo/Tacotron2/scripts/prepare_mels.sh delete mode 100644 demo/Tacotron2/tacotron2/arg_parser.py delete mode 100644 demo/Tacotron2/tacotron2/data_function.py delete mode 100644 demo/Tacotron2/tacotron2/loss_function.py delete mode 100644 demo/Tacotron2/tacotron2/model.py delete mode 100644 demo/Tacotron2/tacotron2/text/LICENCE delete mode 100644 demo/Tacotron2/tacotron2/text/__init__.py delete mode 100644 demo/Tacotron2/tacotron2/text/cleaners.py delete mode 100644 demo/Tacotron2/tacotron2/text/cmudict.py delete mode 100644 demo/Tacotron2/tacotron2/text/numbers.py delete mode 100644 demo/Tacotron2/tacotron2/text/symbols.py delete mode 100644 demo/Tacotron2/tensorrt/convert_onnx2trt.py delete mode 100644 demo/Tacotron2/tensorrt/convert_tacotron22onnx.py delete mode 100644 demo/Tacotron2/tensorrt/convert_waveglow2onnx.py delete mode 100644 demo/Tacotron2/tensorrt/generate_decoder.py delete mode 100644 demo/Tacotron2/tensorrt/inference_trt.py delete mode 100644 demo/Tacotron2/tensorrt/run_latency_tests_trt.sh delete mode 100644 demo/Tacotron2/tensorrt/test_infer_trt.py delete mode 100644 demo/Tacotron2/tensorrt/trt_utils.py delete mode 100644 demo/Tacotron2/test_infer.py delete mode 100644 demo/Tacotron2/test_infer.sh delete mode 100644 demo/Tacotron2/train.py delete mode 100644 demo/Tacotron2/waveglow/arg_parser.py delete mode 100644 demo/Tacotron2/waveglow/data_function.py delete mode 100644 demo/Tacotron2/waveglow/denoiser.py delete mode 100644 demo/Tacotron2/waveglow/loss_function.py delete mode 100644 demo/Tacotron2/waveglow/model.py delete mode 100644 demo/experimental/HuggingFace-Diffusers/README.md delete mode 100644 demo/experimental/HuggingFace-Diffusers/TensorRT-diffusers-txt2img.ipynb create mode 100644 docker/rockylinux8.Dockerfile create mode 100644 docker/rockylinux9.Dockerfile create mode 100644 docker/ubuntu-22.04-aarch64.Dockerfile create mode 100644 docker/ubuntu-cross-aarch64.Dockerfile delete mode 100644 tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/plugins/toyPlugin/__init__.py rename tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/{graph_with_subgraph_matching_toy_plugin.onnx => toy_subgraph.onnx} (100%) create mode 100644 tools/Polygraphy/polygraphy/backend/trt/file_reader.py rename tools/Polygraphy/tests/models/{graph_with_subgraph_matching_toy_plugin.onnx => toy_subgraph.onnx} (100%) create mode 100644 tools/onnx-graphsurgeon/examples/12_using_bf16/README.md rename demo/Tacotron2/loss_functions.py => tools/onnx-graphsurgeon/examples/12_using_bf16/generate.py (52%) create mode 100644 tools/onnx-graphsurgeon/examples/resources/12_bf16.onnx.png diff --git a/CHANGELOG.md b/CHANGELOG.md index 66139b50..0ef9e135 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,23 @@ # TensorRT OSS Release Changelog -## 10.0.0 EA - 2024-04-02 +## 10.0.1 GA - 2024-04-30 + +Key Features and Updates: + + - Parser changes + - Added support for building with `protobuf-lite`. + - Fixed issue when parsing and refitting models with nested `BatchNormalization` nodes. + - Added support for empty inputs in custom plugin nodes. + - Demo changes + - The following demos have been removed: Jasper, Tacotron2, HuggingFace Diffusers notebook + - Updated tooling + - Polygraphy v0.49.10 + - ONNX-GraphSurgeon v0.5.2 + - Build Containers + - Updated default cuda versions to `12.4.0`. + - Added Rocky Linux 8 and Rocky Linux 9 build containers + +## 10.0.0 EA - 2024-03-27 Key Features and Updates: diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d29b78e..a1f072a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -143,7 +143,20 @@ if(BUILD_PARSERS) configure_protobuf(${PROTOBUF_VERSION}) endif() -find_library_create_target(nvinfer nvinfer SHARED ${TRT_LIB_DIR}) +# Windows library names have major version appended. +if (MSVC) + set(nvinfer_lib_name "nvinfer_${TRT_SOVERSION}") + set(nvinfer_plugin_lib_name "nvinfer_plugin_${TRT_SOVERSION}") + set(nvinfer_vc_plugin_lib_name "nvinfer_vc_plugin_${TRT_SOVERSION}") + set(nvonnxparser_lib_name "nvonnxparser_${TRT_SOVERSION}") +else() + set(nvinfer_lib_name "nvinfer") + set(nvinfer_plugin_lib_name "nvinfer_plugin") + set(nvinfer_vc_plugin_lib_name "nvinfer_vc_plugin") + set(nvonnxparser_lib_name "nvonnxparser") +endif() + +find_library_create_target(nvinfer ${nvinfer_lib_name} SHARED ${TRT_LIB_DIR}) find_library(CUDART_LIB cudart_static HINTS ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib lib/x64 lib64) @@ -165,7 +178,16 @@ else() 75 ) - string(REGEX MATCH "aarch64" IS_ARM "${TRT_PLATFORM_ID}") + find_file(IS_L4T_NATIVE nv_tegra_release PATHS /env/) + set (IS_L4T_CROSS "False") + if (DEFINED ENV{IS_L4T_CROSS}) + set(IS_L4T_CROSS $ENV{IS_L4T_CROSS}) + endif() + + if (IS_L4T_NATIVE OR ${IS_L4T_CROSS} STREQUAL "True") + # Only Orin (SM87) supported + list(APPEND GPU_ARCHS 87) + endif() if (CUDA_VERSION VERSION_GREATER_EQUAL 11.0) # Ampere GPU (SM80) support is only available in CUDA versions > 11.0 @@ -206,13 +228,13 @@ endif() if(BUILD_PLUGINS) add_subdirectory(plugin) else() - find_library_create_target(nvinfer_plugin nvinfer_plugin SHARED ${TRT_OUT_DIR} ${TRT_LIB_DIR}) + find_library_create_target(nvinfer_plugin ${nvinfer_plugin_lib_name} SHARED ${TRT_OUT_DIR} ${TRT_LIB_DIR}) endif() if(BUILD_PARSERS) add_subdirectory(parsers) else() - find_library_create_target(nvonnxparser nvonnxparser SHARED ${TRT_OUT_DIR} ${TRT_LIB_DIR}) + find_library_create_target(nvonnxparser ${nvonnxparser_lib_name} SHARED ${TRT_OUT_DIR} ${TRT_LIB_DIR}) endif() if(BUILD_SAMPLES) diff --git a/README.md b/README.md index 28a3edba..9e2bf7b9 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ You can skip the **Build** section to enjoy TensorRT with Python. To build the TensorRT-OSS components, you will first need the following software packages. **TensorRT GA build** -* TensorRT v10.0.0.6 +* TensorRT v10.0.1.6 * Available from direct download links listed below **System Packages** @@ -73,16 +73,16 @@ To build the TensorRT-OSS components, you will first need the following software If using the TensorRT OSS build container, TensorRT libraries are preinstalled under `/usr/lib/x86_64-linux-gnu` and you may skip this step. Else download and extract the TensorRT GA build from [NVIDIA Developer Zone](https://developer.nvidia.com) with the direct links below: - - [TensorRT 10.0.0.6 for CUDA 11.8, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/tars/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-11.8.tar.gz) - - [TensorRT 10.0.0.6 for CUDA 12.4, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/tars/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz) + - [TensorRT 10.0.1.6 for CUDA 11.8, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz) + - [TensorRT 10.0.1.6 for CUDA 12.4, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz) **Example: Ubuntu 20.04 on x86-64 with cuda-12.4** ```bash cd ~/Downloads - tar -xvzf TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz - export TRT_LIBPATH=`pwd`/TensorRT-10.0.0.6 + tar -xvzf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz + export TRT_LIBPATH=`pwd`/TensorRT-10.0.1.6 ``` ## Setting Up The Build Environment @@ -92,16 +92,27 @@ For Linux platforms, we recommend that you generate a docker container for build 1. #### Generate the TensorRT-OSS build container. The TensorRT-OSS build container can be generated using the supplied Dockerfiles and build scripts. The build containers are configured for building TensorRT OSS out-of-the-box. - **Example: Ubuntu 20.04 on x86-64 with cuda-12.3.2 (default)** + **Example: Ubuntu 20.04 on x86-64 with cuda-12.4 (default)** ```bash - ./docker/build.sh --file docker/ubuntu-20.04.Dockerfile --tag tensorrt-ubuntu20.04-cuda12.3.2 + ./docker/build.sh --file docker/ubuntu-20.04.Dockerfile --tag tensorrt-ubuntu20.04-cuda12.4 + ``` + **Example: Rockylinux8 on x86-64 with cuda-12.4** + ```bash + ./docker/build.sh --file docker/rockylinux8.Dockerfile --tag tensorrt-rockylinux8-cuda12.4 + ``` + **Example: Ubuntu 22.04 cross-compile for Jetson (aarch64) with cuda-12.4 (JetPack SDK)** + ```bash + ./docker/build.sh --file docker/ubuntu-cross-aarch64.Dockerfile --tag tensorrt-jetpack-cuda12.4 + ``` + **Example: Ubuntu 22.04 on aarch64 with cuda-12.4** + ```bash + ./docker/build.sh --file docker/ubuntu-22.04-aarch64.Dockerfile --tag tensorrt-aarch64-ubuntu22.04-cuda12.4 ``` - 2. #### Launch the TensorRT-OSS build container. **Example: Ubuntu 20.04 build container** ```bash - ./docker/launch.sh --tag tensorrt-ubuntu20.04-cuda12.3.2 --gpus all + ./docker/launch.sh --tag tensorrt-ubuntu20.04-cuda12.4 --gpus all ``` > NOTE:
1. Use the `--tag` corresponding to build container generated in Step 1. @@ -112,13 +123,36 @@ For Linux platforms, we recommend that you generate a docker container for build ## Building TensorRT-OSS * Generate Makefiles and build. - **Example: Linux (x86-64) build with default cuda-12.3.2** + **Example: Linux (x86-64) build with default cuda-12.4** ```bash cd $TRT_OSSPATH mkdir -p build && cd build cmake .. -DTRT_LIB_DIR=$TRT_LIBPATH -DTRT_OUT_DIR=`pwd`/out make -j$(nproc) ``` + **Example: Linux (aarch64) build with default cuda-12.4** + ```bash + cd $TRT_OSSPATH + mkdir -p build && cd build + cmake .. -DTRT_LIB_DIR=$TRT_LIBPATH -DTRT_OUT_DIR=`pwd`/out -DCMAKE_TOOLCHAIN_FILE=$TRT_OSSPATH/cmake/toolchains/cmake_aarch64-native.toolchain + make -j$(nproc) + ``` + **Example: Native build on Jetson (aarch64) with cuda-12.4** + ```bash + cd $TRT_OSSPATH + mkdir -p build && cd build + cmake .. -DTRT_LIB_DIR=$TRT_LIBPATH -DTRT_OUT_DIR=`pwd`/out -DTRT_PLATFORM_ID=aarch64 -DCUDA_VERSION=12.4 + CC=/usr/bin/gcc make -j$(nproc) + ``` + > NOTE: C compiler must be explicitly specified via CC= for native aarch64 builds of protobuf. + + **Example: Ubuntu 22.04 Cross-Compile for Jetson (aarch64) with cuda-12.4 (JetPack)** + ```bash + cd $TRT_OSSPATH + mkdir -p build && cd build + cmake .. -DCMAKE_TOOLCHAIN_FILE=$TRT_OSSPATH/cmake/toolchains/cmake_aarch64.toolchain -DCUDA_VERSION=12.4 -DCUDNN_LIB=/pdk_files/cudnn/usr/lib/aarch64-linux-gnu/libcudnn.so -DCUBLAS_LIB=/usr/local/cuda-12.4/targets/aarch64-linux/lib/stubs/libcublas.so -DCUBLASLT_LIB=/usr/local/cuda-12.4/targets/aarch64-linux/lib/stubs/libcublasLt.so -DTRT_LIB_DIR=/pdk_files/tensorrt/lib + make -j$(nproc) + ``` > NOTE:
1. The default CUDA version used by CMake is 12.2.0. To override this, for example to 11.8, append `-DCUDA_VERSION=11.8` to the cmake command. diff --git a/VERSION b/VERSION index efdce495..db243822 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -10.0.0.6 +10.0.1.6 diff --git a/cmake/modules/find_library_create_target.cmake b/cmake/modules/find_library_create_target.cmake index a1d29efb..49441847 100644 --- a/cmake/modules/find_library_create_target.cmake +++ b/cmake/modules/find_library_create_target.cmake @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -25,9 +25,6 @@ macro(find_library_create_target target_name lib libtype hints) find_library(${lib}_LIB_PATH ${lib}) message(STATUS "Library that was found ${${lib}_LIB_PATH}") add_library(${target_name} ${libtype} IMPORTED) - set_property(TARGET ${target_name} PROPERTY IMPORTED_LOCATION ${${lib}_LIB_PATH}) # This should be .so or .dll file, currently its .a or .lib. - if (WIN32) - set_property(TARGET ${target_name} PROPERTY IMPORTED_IMPLIB ${${lib}_LIB_PATH}) # This should be a .lib file - endif() + set_property(TARGET ${target_name} PROPERTY IMPORTED_LOCATION ${${lib}_LIB_PATH}) message(STATUS "==========================================================================================") endmacro() diff --git a/cmake/modules/set_ifndef.cmake b/cmake/modules/set_ifndef.cmake index fbdc9be1..85d769e9 100644 --- a/cmake/modules/set_ifndef.cmake +++ b/cmake/modules/set_ifndef.cmake @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/cmake/toolchains/cmake_aarch64-android.toolchain b/cmake/toolchains/cmake_aarch64-android.toolchain index 87e490f6..ec768aa4 100644 --- a/cmake/toolchains/cmake_aarch64-android.toolchain +++ b/cmake/toolchains/cmake_aarch64-android.toolchain @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/cmake/toolchains/cmake_aarch64-native.toolchain b/cmake/toolchains/cmake_aarch64-native.toolchain index fd4e30cc..bd49c9bb 100644 --- a/cmake/toolchains/cmake_aarch64-native.toolchain +++ b/cmake/toolchains/cmake_aarch64-native.toolchain @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/cmake/toolchains/cmake_aarch64.toolchain b/cmake/toolchains/cmake_aarch64.toolchain index 3c87fd65..020a1066 100644 --- a/cmake/toolchains/cmake_aarch64.toolchain +++ b/cmake/toolchains/cmake_aarch64.toolchain @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,6 +19,8 @@ set(CMAKE_SYSTEM_NAME Linux) set(CMAKE_SYSTEM_PROCESSOR aarch64) set(TRT_PLATFORM_ID "aarch64") +set(CMAKE_FIND_LIBRARY_PREFIXES "lib") +set(CMAKE_FIND_LIBRARY_SUFFIXES .so) if("$ENV{ARMSERVER}" AND "${CUDA_VERSION}" VERSION_GREATER_EQUAL 11.0) set(CUDA_PLATFORM_ID "sbsa-linux") @@ -46,10 +48,18 @@ set(BUILD_LIBRARY_ONLY 1) set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_ROOT}) set(CUDA_INCLUDE_DIRS ${CUDA_ROOT}/include) +set(CMAKE_THREAD_LIBS_INIT "-lpthread") +set(CMAKE_HAVE_THREADS_LIBRARY 1) +set(CMAKE_USE_WIN32_THREADS_INIT 0) +set(CMAKE_USE_PTHREADS_INIT 1) + find_library(RT_LIB rt PATHS /usr/aarch64-linux-gnu/lib /usr/lib/aarch64-linux-gnu) if(NOT RT_LIB) - message(WARNING "librt.so not found in default paths") + find_file(RT_LIB librt.so PATHS /usr/aarch64-linux-gnu/lib /usr/lib/aarch64-linux-gnu) + if(NOT RT_LIB) + message(WARNING "librt.so not found in default paths") + endif() endif() message("RT_LIB: ${RT_LIB}") diff --git a/cmake/toolchains/cmake_aarch64_cross.toolchain b/cmake/toolchains/cmake_aarch64_cross.toolchain index 177a82f9..844fdd89 100644 --- a/cmake/toolchains/cmake_aarch64_cross.toolchain +++ b/cmake/toolchains/cmake_aarch64_cross.toolchain @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/cmake/toolchains/cmake_ppc64le.toolchain b/cmake/toolchains/cmake_ppc64le.toolchain index 074c3fb0..2d6272f5 100644 --- a/cmake/toolchains/cmake_ppc64le.toolchain +++ b/cmake/toolchains/cmake_ppc64le.toolchain @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/cmake/toolchains/cmake_qnx.toolchain b/cmake/toolchains/cmake_qnx.toolchain index 95f337a8..60b36163 100644 --- a/cmake/toolchains/cmake_qnx.toolchain +++ b/cmake/toolchains/cmake_qnx.toolchain @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/cmake/toolchains/cmake_x64_win.toolchain b/cmake/toolchains/cmake_x64_win.toolchain index 5dad0ce7..87b04f5f 100644 --- a/cmake/toolchains/cmake_x64_win.toolchain +++ b/cmake/toolchains/cmake_x64_win.toolchain @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/cmake/toolchains/cmake_x86_64.toolchain b/cmake/toolchains/cmake_x86_64.toolchain index 8d452945..daf336ef 100644 --- a/cmake/toolchains/cmake_x86_64.toolchain +++ b/cmake/toolchains/cmake_x86_64.toolchain @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/cmake/toolchains/cmake_x86_64_agnostic.toolchain b/cmake/toolchains/cmake_x86_64_agnostic.toolchain index 8253d8f1..91c03095 100644 --- a/cmake/toolchains/cmake_x86_64_agnostic.toolchain +++ b/cmake/toolchains/cmake_x86_64_agnostic.toolchain @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/CMakeLists.txt b/demo/BERT/CMakeLists.txt index cc2c8fc9..94639130 100644 --- a/demo/BERT/CMakeLists.txt +++ b/demo/BERT/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/README.md b/demo/BERT/README.md index f867a321..27d141f5 100755 --- a/demo/BERT/README.md +++ b/demo/BERT/README.md @@ -73,9 +73,9 @@ The following software version configuration has been tested: |Software|Version| |--------|-------| -|Python|>=3.6| -|TensorRT|8.5.1| -|CUDA|11.6| +|Python|>=3.8| +|TensorRT|10.0.1.6| +|CUDA|12.4| ## Setup diff --git a/demo/BERT/builder.py b/demo/BERT/builder.py index 5eafe367..c5f21b0a 100755 --- a/demo/BERT/builder.py +++ b/demo/BERT/builder.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -43,7 +43,7 @@ trt_version = [n for n in trt.__version__.split('.')] # Import necessary plugins for demoBERT -plugin_lib_name = "nvinfer_plugin.dll" if sys.platform == "win32" else "libnvinfer_plugin.so" +plugin_lib_name = "nvinfer_plugin_10.dll" if sys.platform == "win32" else "libnvinfer_plugin.so" env_name_to_add_path = "PATH" if sys.platform == "win32" else "LD_LIBRARY_PATH" handle = ctypes.CDLL(plugin_lib_name, mode=ctypes.RTLD_GLOBAL) if not handle: diff --git a/demo/BERT/builder_utils.py b/demo/BERT/builder_utils.py index 248bee80..abf0f514 100644 --- a/demo/BERT/builder_utils.py +++ b/demo/BERT/builder_utils.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/builder_varseqlen.py b/demo/BERT/builder_varseqlen.py index ad25ef0c..66a9d571 100755 --- a/demo/BERT/builder_varseqlen.py +++ b/demo/BERT/builder_varseqlen.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -42,7 +42,7 @@ trt_version = [n for n in trt.__version__.split('.')] # Import necessary plugins for demoBERT -plugin_lib_name = "nvinfer_plugin.dll" if sys.platform == "win32" else "libnvinfer_plugin.so" +plugin_lib_name = "nvinfer_plugin_10.dll" if sys.platform == "win32" else "libnvinfer_plugin.so" env_name_to_add_path = "PATH" if sys.platform == "win32" else "LD_LIBRARY_PATH" handle = ctypes.CDLL(plugin_lib_name, mode=ctypes.RTLD_GLOBAL) if not handle: diff --git a/demo/BERT/helpers/calibrator.py b/demo/BERT/helpers/calibrator.py index beacc625..09e6014b 100644 --- a/demo/BERT/helpers/calibrator.py +++ b/demo/BERT/helpers/calibrator.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/helpers/data_processing.py b/demo/BERT/helpers/data_processing.py index 88459ebf..e7deae31 100644 --- a/demo/BERT/helpers/data_processing.py +++ b/demo/BERT/helpers/data_processing.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/helpers/tokenization.py b/demo/BERT/helpers/tokenization.py index 434f411d..9d3cb22d 100644 --- a/demo/BERT/helpers/tokenization.py +++ b/demo/BERT/helpers/tokenization.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/infer_c/bert_infer.h b/demo/BERT/infer_c/bert_infer.h index 2f72102a..d049877e 100644 --- a/demo/BERT/infer_c/bert_infer.h +++ b/demo/BERT/infer_c/bert_infer.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -152,15 +152,12 @@ struct BertInference mDeviceBuffers.emplace_back(devBuf); mHostOutput.resize(numOutputItems); - mBindings.resize(mEngine->getNbIOTensors() * mEngine->getNbOptimizationProfiles()); } void prepare(int profIdx, int batchSize) { - mContext->setOptimizationProfile(profIdx); - const int bindingIdxOffset = profIdx * mEngine->getNbIOTensors(); - std::copy(mDeviceBuffers.begin(), mDeviceBuffers.end(), mBindings.begin() + bindingIdxOffset); + mContext->setOptimizationProfileAsync(profIdx, mStream); if (mEnableVariableLen) { @@ -191,13 +188,13 @@ struct BertInference for (int32_t i = 0; i < mEngine->getNbIOTensors(); i++) { auto const& name = mEngine->getIOTensorName(i); - context->setTensorAddress(name, mBindings[i + bindingIdxOffset]); + mContext->setTensorAddress(name, mDeviceBuffers[i]); } cudaGraph_t graph; cudaGraphExec_t exec; // warm up and let mContext do cublas initialization - bool status = mContext->enqueueV3(mStream, nullptr); + bool status = mContext->enqueueV3(mStream); if (!status) { gLogError << "Enqueue failed\n"; @@ -206,7 +203,7 @@ struct BertInference gLogVerbose << "Capturing graph\n"; gpuErrChk(cudaStreamBeginCapture(mStream, cudaStreamCaptureModeRelaxed)); - status = mContext->enqueueV3(mStream, nullptr); + status = mContext->enqueueV3(mStream); if (!status) { gLogError << "Enqueue failed\n"; @@ -240,7 +237,7 @@ struct BertInference } else { - bool status = mContext->enqueueV3(mStream, nullptr); + bool status = mContext->enqueueV3(mStream); if (!status) { gLogError << "Enqueue failed\n"; @@ -265,7 +262,7 @@ struct BertInference } else { - bool status = mContext->enqueueV3(mStream, nullptr); + bool status = mContext->enqueueV3(mStream); if (!status) { gLogError << "Enqueue failed\n"; @@ -347,7 +344,6 @@ struct BertInference TrtUniquePtr mRuntime{nullptr}; TrtUniquePtr mEngine{nullptr}; TrtUniquePtr mContext{nullptr}; - std::vector mBindings; bool mEnableVariableLen; std::vector mCuSeqlens; diff --git a/demo/BERT/infer_c/common.h b/demo/BERT/infer_c/common.h index b5280e2a..da29944c 100644 --- a/demo/BERT/infer_c/common.h +++ b/demo/BERT/infer_c/common.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -169,7 +169,7 @@ struct TrtDestroyer { void operator()(T* t) { - t->destroy(); + delete t; } }; diff --git a/demo/BERT/infer_c/infer_c.cpp b/demo/BERT/infer_c/infer_c.cpp index b868a661..946ce663 100644 --- a/demo/BERT/infer_c/infer_c.cpp +++ b/demo/BERT/infer_c/infer_c.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/infer_c/logging.cpp b/demo/BERT/infer_c/logging.cpp index b6b14298..f651155c 100644 --- a/demo/BERT/infer_c/logging.cpp +++ b/demo/BERT/infer_c/logging.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/infer_c/logging.h b/demo/BERT/infer_c/logging.h index 2c36d039..2c137465 100644 --- a/demo/BERT/infer_c/logging.h +++ b/demo/BERT/infer_c/logging.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/infer_c/perf.cpp b/demo/BERT/infer_c/perf.cpp index bbc6de76..0208f2eb 100644 --- a/demo/BERT/infer_c/perf.cpp +++ b/demo/BERT/infer_c/perf.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/inference.py b/demo/BERT/inference.py index dc172181..aa0d0dd7 100644 --- a/demo/BERT/inference.py +++ b/demo/BERT/inference.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -121,7 +121,7 @@ def question_features(tokens, question): return dp.convert_example_to_features(tokens, question, tokenizer, max_seq_length, doc_stride, args.max_query_length) # Import necessary plugins for demoBERT - plugin_lib_name = "nvinfer_plugin.dll" if sys.platform == "win32" else "libnvinfer_plugin.so" + plugin_lib_name = "nvinfer_plugin_10.dll" if sys.platform == "win32" else "libnvinfer_plugin.so" env_name_to_add_path = "PATH" if sys.platform == "win32" else "LD_LIBRARY_PATH" handle = ctypes.CDLL(plugin_lib_name, mode=ctypes.RTLD_GLOBAL) if not handle: diff --git a/demo/BERT/inference_c.py b/demo/BERT/inference_c.py index e2bda9af..b10127bd 100644 --- a/demo/BERT/inference_c.py +++ b/demo/BERT/inference_c.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/inference_varseqlen.py b/demo/BERT/inference_varseqlen.py index 7eb87012..700ddcce 100644 --- a/demo/BERT/inference_varseqlen.py +++ b/demo/BERT/inference_varseqlen.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -120,7 +120,7 @@ def question_features(tokens, question): return dp.convert_example_to_features(tokens, question, tokenizer, max_seq_length, doc_stride, args.max_query_length) # Import necessary plugins for demoBERT - plugin_lib_name = "nvinfer_plugin.dll" if sys.platform == "win32" else "libnvinfer_plugin.so" + plugin_lib_name = "nvinfer_plugin_10.dll" if sys.platform == "win32" else "libnvinfer_plugin.so" env_name_to_add_path = "PATH" if sys.platform == "win32" else "LD_LIBRARY_PATH" handle = ctypes.CDLL(plugin_lib_name, mode=ctypes.RTLD_GLOBAL) if not handle: diff --git a/demo/BERT/perf.py b/demo/BERT/perf.py index 7b4e9da9..f3d2ab74 100644 --- a/demo/BERT/perf.py +++ b/demo/BERT/perf.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/perf_varseqlen.py b/demo/BERT/perf_varseqlen.py index 853201a4..6708f989 100644 --- a/demo/BERT/perf_varseqlen.py +++ b/demo/BERT/perf_varseqlen.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/squad/evaluate-v1.1.py b/demo/BERT/squad/evaluate-v1.1.py index c73db423..bde41564 100644 --- a/demo/BERT/squad/evaluate-v1.1.py +++ b/demo/BERT/squad/evaluate-v1.1.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/BERT/squad/evaluate-v2.0.py b/demo/BERT/squad/evaluate-v2.0.py index e36d3e9f..67518e3c 100644 --- a/demo/BERT/squad/evaluate-v2.0.py +++ b/demo/BERT/squad/evaluate-v2.0.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/DeBERTa/deberta_onnx_modify.py b/demo/DeBERTa/deberta_onnx_modify.py index 234c4659..f8fe61f5 100644 --- a/demo/DeBERTa/deberta_onnx_modify.py +++ b/demo/DeBERTa/deberta_onnx_modify.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/DeBERTa/deberta_ort_inference.py b/demo/DeBERTa/deberta_ort_inference.py index 17378989..05741733 100644 --- a/demo/DeBERTa/deberta_ort_inference.py +++ b/demo/DeBERTa/deberta_ort_inference.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/DeBERTa/deberta_pytorch2onnx.py b/demo/DeBERTa/deberta_pytorch2onnx.py index 51546b29..7745f0dc 100644 --- a/demo/DeBERTa/deberta_pytorch2onnx.py +++ b/demo/DeBERTa/deberta_pytorch2onnx.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/DeBERTa/deberta_tensorrt_inference.py b/demo/DeBERTa/deberta_tensorrt_inference.py index 378a5953..355ad7cf 100644 --- a/demo/DeBERTa/deberta_tensorrt_inference.py +++ b/demo/DeBERTa/deberta_tensorrt_inference.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/DeBERTa/requirements.txt b/demo/DeBERTa/requirements.txt index 59b63433..c52dd08a 100644 --- a/demo/DeBERTa/requirements.txt +++ b/demo/DeBERTa/requirements.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/Diffusion/README.md b/demo/Diffusion/README.md index d550c83b..42949381 100644 --- a/demo/Diffusion/README.md +++ b/demo/Diffusion/README.md @@ -19,15 +19,15 @@ Install nvidia-docker using [these intructions](https://docs.nvidia.com/datacent docker run --rm -it --gpus all -v $PWD:/workspace nvcr.io/nvidia/pytorch:24.01-py3 /bin/bash ``` +NOTE: The demo supports CUDA>=11.8 + ### Install latest TensorRT release ```bash python3 -m pip install --upgrade pip -python3 -m pip install --pre --upgrade --extra-index-url https://pypi.nvidia.com tensorrt +pip install --pre tensorrt-cu12 ``` -> NOTE: TensorRT 10.x is only available as a pre-release - Check your installed version using: `python3 -c 'import tensorrt;print(tensorrt.__version__)'` @@ -39,27 +39,24 @@ Check your installed version using: export TRT_OSSPATH=/workspace cd $TRT_OSSPATH/demo/Diffusion pip3 install -r requirements.txt - ``` -> NOTE: demoDiffusion has been tested on systems with NVIDIA A100, RTX3090, and RTX4090 GPUs, and the following software configuration. +> NOTE: demoDiffusion has been tested on systems with NVIDIA H100, A100, L40, T4, and RTX4090 GPUs, and the following software configuration. ``` diffusers 0.26.3 onnx 1.15.0 -onnx-graphsurgeon 0.3.27 -onnxruntime 1.17.0 -polygraphy 0.49.7 -tensorrt 10.0.0.6 +onnx-graphsurgeon 0.5.2 +onnxruntime 1.16.3 +polygraphy 0.49.9 +tensorrt 10.0.1.6 tokenizers 0.13.3 -torch 2.1.0 -transformers 4.31.0 +torch 2.2.0 +transformers 4.33.1 controlnet-aux 0.0.6 -nvidia-ammo 0.7.0 +nvidia-ammo 0.9.4 ``` - > NOTE: optionally install HuggingFace [accelerate](https://pypi.org/project/accelerate/) package for faster and less memory-intense model loading. - # Running demoDiffusion ### Review usage instructions for the supported pipelines @@ -75,6 +72,7 @@ python3 demo_txt2img_xl.py --help ### HuggingFace user access token To download model checkpoints for the Stable Diffusion pipelines, obtain a `read` access token to HuggingFace Hub. See [instructions](https://huggingface.co/docs/hub/security-tokens). +> NOTE: This step isn't required for many models now. ```bash export HF_TOKEN= @@ -144,10 +142,9 @@ python3 demo_txt2img_xl.py "Picture of a rustic Italian village with Olive trees ### Faster Text-to-image using SDXL & INT8 quantization using AMMO ```bash -python3 demo_txt2img_xl.py "a photo of an astronaut riding a horse on mars" --version xl-1.0 --onnx-dir onnx-sdxl --engine-dir engine-sdxl --int8 --quantization-level 3 +python3 demo_txt2img_xl.py "a photo of an astronaut riding a horse on mars" --version xl-1.0 --onnx-dir onnx-sdxl --engine-dir engine-sdxl --int8 ``` - -Note that the calibration process can be quite time-consuming, and will be repeated if `--quantization-level`, `--denoising-steps`, or `--onnx-dir` is changed. +> Note that INT8 quantization is only supported for SDXL, and won't work with LoRA weights. Some prompts may produce better inputs with fewer denoising steps (e.g. `--denoising-steps 20`) but this will repeat the calibration, ONNX export, and engine building processes for the U-Net. ### Faster Text-to-Image using SDXL + LCM (Latent Consistency Model) LoRA weights [LCM-LoRA](https://arxiv.org/abs/2311.05556) produces good quality images in 4 to 8 denoising steps instead of 30+ needed base model. Note that we use LCM scheduler and disable classifier-free-guidance by setting `--guidance-scale` to 0. diff --git a/demo/Diffusion/calibration.py b/demo/Diffusion/calibration.py deleted file mode 100644 index 98adb6d3..00000000 --- a/demo/Diffusion/calibration.py +++ /dev/null @@ -1,177 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import types -from typing import Callable, Optional, Union - -import numpy as np -import torch -import torch.distributed as dist -import torch.nn as nn -from torch.distributed import ReduceOp -from utilities import PercentileAmaxes - -from ammo.torch.quantization.model_calib import ( - enable_stats_collection, - finish_stats_collection, - max_calibrate, -) -from ammo.torch.quantization.utils import is_quantized_linear - - -def precentile_calib_mode(base_unet, quant_config={}): - def compute_amax(self, all_reduce=True): - """Return the absolute max of all tensors collected.""" - if ( - self._calib_amax is not None - and all_reduce - and dist.is_available() - and dist.is_initialized() - and dist.get_world_size() > 1 - ): - tmp_amax = self._calib_amax.clone() - dist.all_reduce(tmp_amax, op=ReduceOp.MAX) - self._calib_amax.copy_(tmp_amax) - if self._track_amax: - up_lim = int(self._amaxs.total_step * self._amaxs.percentile) - if up_lim <= 0: - up_lim = 1 - amaxs_values = [self._amaxs.data[i] for i in range(0, up_lim)] - act_amax = ( - torch.tensor(np.vstack(amaxs_values).min(axis=0)) - .float() - .squeeze(0) - .to(self._calib_amax.device) - .to(self._calib_amax.dtype) - ) - return act_amax - return self._calib_amax - - for _, module in base_unet.named_modules(): - if isinstance(module, (nn.Linear, nn.Conv2d)): - module.input_quantizer._calibrator._track_amax = True - module.input_quantizer._calibrator._amaxs = PercentileAmaxes( - total_step=quant_config["base-step"], percentile=quant_config["percentile"] - ) - module.input_quantizer._calibrator.compute_amax = types.MethodType( - compute_amax, module.input_quantizer._calibrator - ) - - -@torch.no_grad() -def smoothquant(model, forward_loop=None): - """ - Rewrite the original SmoothQuant method - """ - assert forward_loop is not None, "forward_loop must be provided for smoothquant" - max_calibrate(model, forward_loop) - - smoothed_modules = 0 - for name, module in model.named_modules(): - if is_quantized_linear(module): - if not hasattr(module.input_quantizer, "_amax"): - print(f"Warning: {name} is not calibrated, skip smoothing") - continue - if module.input_quantizer.num_bits != 8 or module.weight_quantizer.num_bits != 8: - print(f"Warning: only int8 smoothing is supported, skip {name}") - continue - if module.input_quantizer.axis != -1: - print(f"Warning: only per-channel smoothing is supported, skip {name}") - continue - - alpha = 1.0 - if hasattr(module, "alpha"): - alpha = module.alpha - assert ( - module.input_quantizer._amax.numel() > 1 - ), f"Error: {name} has only one channel to smooth" - - # It is important to keep scaling math in fp32 to be numerically safe - act_amax = module.input_quantizer.amax.float() - - act_device = act_amax.device - - # If model is split across devices, this tensor may be on wrong one - act_amax = act_amax.to(module.weight.device) - - weight_scale = module.weight.abs().max(dim=0, keepdim=True)[0] - scale_a = (weight_scale.pow(1 - alpha) / act_amax.pow(alpha)).squeeze() - - # Some channel could have 0 amax which causes scale_a to overflow. Explicitly mask them out here - epsilon = 1.0 / (1 << 31) - if act_amax.min() <= epsilon: - zero_mask = act_amax <= epsilon - scale_a[zero_mask] = 1 - inv_scale_a = 1.0 / scale_a - inv_scale_a = inv_scale_a.squeeze()[None, :] - - # Use per-tensor quantization for activation, add a pre-quantization scale vector - module.input_quantizer.pre_quant_scale = scale_a.to(module.weight.dtype).to(act_device) - module.input_quantizer._axis = None - delattr(module.input_quantizer, "_amax") - module.input_quantizer.amax = torch.tensor( - (act_amax * scale_a).max().item(), - dtype=module.weight.dtype, - device=module.weight.device, - ) - - # Multiply weight by inv_scale_a and recalibrate - module.weight.detach().copy_( - (module.weight.float() * inv_scale_a).to(module.weight.dtype) - ) - - enable_stats_collection(module.weight_quantizer) - module.weight_quantizer(module.weight) - finish_stats_collection(module.weight_quantizer) - - smoothed_modules += 1 - print(f"Smoothed {smoothed_modules} modules") - - -def calibrate( - model: nn.Module, - algorithm: Union[str, dict, None] = "max", - forward_loop: Optional[Callable] = None, -) -> None: - if algorithm is None: - return - - if isinstance(algorithm, str): - kwargs = {} - elif isinstance(algorithm, dict): - kwargs = algorithm.copy() - algorithm = kwargs.pop("method") - else: - raise TypeError(f"Unsupported type for algorithm: {type(algorithm)}") - - if algorithm == "smoothquant": - smoothquant(model, forward_loop) - elif algorithm == "max": - max_calibrate(model, forward_loop) - else: - raise ValueError(f"Unsupported calibration algorithm: {algorithm}") - - -def reg_alpha_qkv(base_unet, alpha): - """ - Only apply alpha to QKV layers - """ - for name, module in base_unet.named_modules(): - if isinstance(module, torch.nn.Linear): - if "to_q" in name or "to_k" in name or "to_v" in name: - module.alpha = alpha - diff --git a/demo/Diffusion/demo_img2img.py b/demo/Diffusion/demo_img2img.py index bf56f6a9..74ec90ad 100755 --- a/demo/Diffusion/demo_img2img.py +++ b/demo/Diffusion/demo_img2img.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/Diffusion/demo_inpaint.py b/demo/Diffusion/demo_inpaint.py index af635df0..29ca0ce2 100755 --- a/demo/Diffusion/demo_inpaint.py +++ b/demo/Diffusion/demo_inpaint.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/Diffusion/demo_txt2img.py b/demo/Diffusion/demo_txt2img.py index 3e33838f..84c9e164 100644 --- a/demo/Diffusion/demo_txt2img.py +++ b/demo/Diffusion/demo_txt2img.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/Diffusion/demo_txt2img_xl.py b/demo/Diffusion/demo_txt2img_xl.py index ea579279..96910756 100644 --- a/demo/Diffusion/demo_txt2img_xl.py +++ b/demo/Diffusion/demo_txt2img_xl.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/Diffusion/models.py b/demo/Diffusion/models.py index b1a196aa..b48028ff 100644 --- a/demo/Diffusion/models.py +++ b/demo/Diffusion/models.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,7 +22,6 @@ ControlNetModel, UNet2DConditionModel ) -from diffusers.utils import convert_state_dict_to_diffusers import json import numpy as np import onnx @@ -159,13 +158,13 @@ def fuse_mha_qkv_int8_sq(self): del tensors[k] removed += 1 print(f"Removed {removed} QDQ nodes") - return removed + return removed # expected 72 for L2.5 def get_path(version, pipeline, controlnets=None): if controlnets is not None: return ["lllyasviel/sd-controlnet-" + modality for modality in controlnets] - + if version == "1.4": if pipeline.is_inpaint(): return "runwayml/stable-diffusion-inpainting" @@ -647,7 +646,7 @@ def __init__(self, unet, controlnets) -> None: super().__init__() self.unet = unet self.controlnets = controlnets - + def forward(self, sample, timestep, encoder_hidden_states, images, controlnet_scales): for i, (image, conditioning_scale, controlnet) in enumerate(zip(images, controlnet_scales, self.controlnets)): down_samples, mid_sample = controlnet( @@ -663,7 +662,7 @@ def forward(self, sample, timestep, encoder_hidden_states, images, controlnet_sc for down_sample in down_samples ] mid_sample *= conditioning_scale - + # merge samples if i == 0: down_block_res_samples, mid_block_res_sample = down_samples, mid_sample @@ -673,7 +672,7 @@ def forward(self, sample, timestep, encoder_hidden_states, images, controlnet_sc for samples_prev, samples_curr in zip(down_block_res_samples, down_samples) ] mid_block_res_sample += mid_sample - + noise_pred = self.unet( sample, timestep, @@ -744,7 +743,7 @@ def get_model(self, torch_inference=''): def get_input_names(self): if self.controlnets is None: return ['sample', 'timestep', 'encoder_hidden_states'] - else: + else: return ['sample', 'timestep', 'encoder_hidden_states', 'images', 'controlnet_scales'] def get_output_names(self): @@ -820,14 +819,14 @@ def get_sample_input(self, batch_size, image_height, image_width, static_shape): dtype = torch.float16 if self.fp16 else torch.float32 if self.controlnets is None: return ( - torch.randn(batch_size, self.unet_dim, latent_height, latent_width, dtype=torch.float32, device=self.device), - torch.tensor([1.], dtype=torch.float32, device=self.device), + torch.randn(batch_size, self.unet_dim, latent_height, latent_width, dtype=dtype, device=self.device), + torch.tensor([1.], dtype=dtype, device=self.device), torch.randn(batch_size, self.text_maxlen, self.embedding_dim, dtype=dtype, device=self.device) ) else: return ( - torch.randn(batch_size, self.unet_dim, latent_height, latent_width, dtype=torch.float32, device=self.device), - torch.tensor(999, dtype=torch.float32, device=self.device), + torch.randn(batch_size, self.unet_dim, latent_height, latent_width, dtype=dtype, device=self.device), + torch.tensor(999, dtype=dtype, device=self.device), torch.randn(batch_size, self.text_maxlen, self.embedding_dim, dtype=dtype, device=self.device), torch.randn(len(self.controlnets), batch_size, 3, image_height, image_width, dtype=dtype, device=self.device), torch.randn(len(self.controlnets), dtype=dtype, device=self.device) @@ -931,8 +930,8 @@ def get_sample_input(self, batch_size, image_height, image_width, static_shape): latent_height, latent_width = self.check_dims(batch_size, image_height, image_width) dtype = torch.float16 if self.fp16 else torch.float32 return ( - torch.randn(self.xB*batch_size, self.unet_dim, latent_height, latent_width, dtype=torch.float32, device=self.device), - torch.tensor([1.], dtype=torch.float32, device=self.device), + torch.randn(self.xB*batch_size, self.unet_dim, latent_height, latent_width, dtype=dtype, device=self.device), + torch.tensor([1.], dtype=dtype, device=self.device), torch.randn(self.xB*batch_size, self.text_maxlen, self.embedding_dim, dtype=dtype, device=self.device), { 'added_cond_kwargs': { diff --git a/demo/Diffusion/requirements.txt b/demo/Diffusion/requirements.txt index 4de26381..5fa939ec 100644 --- a/demo/Diffusion/requirements.txt +++ b/demo/Diffusion/requirements.txt @@ -1,4 +1,3 @@ -accelerate colored controlnet_aux==0.0.6 cuda-python @@ -7,11 +6,10 @@ ftfy matplotlib nvtx onnx==1.15.0 -onnxruntime==1.17.0 +onnxruntime==1.16.3 opencv-python==4.8.0.74 scipy -transformers==4.31.0 ---extra-index-url https://pypi.nvidia.com -nvidia-ammo==0.7.0 +transformers==4.33.1 +nvidia-ammo==0.9.4 onnx-graphsurgeon -polygraphy +polygraphy==0.49.9 diff --git a/demo/Diffusion/stable_diffusion_pipeline.py b/demo/Diffusion/stable_diffusion_pipeline.py index 13bd4156..10b7f57e 100755 --- a/demo/Diffusion/stable_diffusion_pipeline.py +++ b/demo/Diffusion/stable_diffusion_pipeline.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,8 +15,8 @@ # limitations under the License. # +import ammo.torch.opt as ato import ammo.torch.quantization as atq -import calibration from cuda import cudart from diffusers import ( DDIMScheduler, @@ -44,7 +44,6 @@ import numpy as np import nvtx import json -import onnx import os import pathlib import tensorrt as trt @@ -55,17 +54,18 @@ PIPELINE_TYPE, TRT_LOGGER, Engine, - filter_func, - get_smoothquant_config, get_refit_weights, load_calib_prompts, merge_loras, prepare_mask_and_masked_image, - quantize_lvl, - replace_lora_layers, save_image, unload_model ) +from utils_ammo import ( + filter_func, + quantize_lvl, + get_int8_config, +) class StableDiffusionPipeline: """ @@ -76,7 +76,7 @@ def __init__( version='1.5', pipeline_type=PIPELINE_TYPE.TXT2IMG, max_batch_size=16, - denoising_steps=50, + denoising_steps=30, scheduler=None, guidance_scale=7.5, device='cuda', @@ -216,6 +216,11 @@ def makeScheduler(cls, subfolder="scheduler", **kwargs): if self.pipeline_type.is_sd_xl(): self.config['clip_hidden_states'] = True self.torch_inference = torch_inference + if self.torch_inference: + torch._inductor.config.conv_1x1_as_mm = True + torch._inductor.config.coordinate_descent_tuning = True + torch._inductor.config.epilogue_fusion = False + torch._inductor.config.coordinate_descent_check_all_directions = True self.use_cuda_graph = use_cuda_graph # initialized in loadEngines() @@ -315,10 +320,11 @@ def loadEngines( timing_cache=None, int8=False, quantization_level=2.5, - quantization_percentile=0.4, - quantization_alpha=0.6, - calibration_steps=384, - denoising_steps=50, + quantization_percentile=1.0, + quantization_alpha=0.8, + calibration_size=32, + calib_batch_size=2, + denoising_steps=30, ): """ Build and load engines for TensorRT accelerated inference. @@ -349,6 +355,24 @@ def loadEngines( Enable all tactic sources during TensorRT engine builds. timing_cache (str): Path to the timing cache to speed up TensorRT build. + int8 (bool): + Whether to quantize to int8 format or not (SDXL only). + quantization_level (float): + Controls which layers to quantize. 1: CNN, 2: CNN+FFN, 2.5: CNN+FFN+QKV, 3: CNN+FC + quantization_percentile (float): + Control quantization scaling factors (amax) collecting range, where the minimum amax in + range(n_steps * percentile) will be collected. Recommendation: 1.0 + quantization_alpha (float): + The alpha parameter for SmoothQuant quantization used for linear layers. + Recommendation: 0.8 for SDXL + calibration_size (int): + The number of steps to use for calibrating the model for quantization. + Recommendation: 32, 64, 128 for SDXL + calib_batch_size (int): + The batch size to use for calibration. Defaults to 2. + denoising_steps (int): + The number of denoising steps. + More denoising steps usually lead to a higher quality image at the expense of slower inference. """ # Create directories if missing for directory in [engine_dir, onnx_dir]: @@ -411,7 +435,7 @@ def loadEngines( if int8: assert self.pipeline_type.is_sd_xl(), "int8 quantization only supported for SDXL pipeline" use_int8['unetxl'] = True - model_suffix['unetxl'] += f"-int8.l{quantization_level}.bs2.s{denoising_steps}.c{calibration_steps}.p{quantization_percentile}.a{quantization_alpha}" + model_suffix['unetxl'] += f"-int8.l{quantization_level}.bs2.s{denoising_steps}.c{calibration_size}.p{quantization_percentile}.a{quantization_alpha}" onnx_path = dict(zip(model_names, [self.getOnnxPath(model_name, onnx_dir, opt=False, suffix=model_suffix[model_name]) for model_name in model_names])) onnx_opt_path = dict(zip(model_names, [self.getOnnxPath(model_name, onnx_dir, suffix=model_suffix[model_name]) for model_name in model_names])) engine_path = dict(zip(model_names, [self.getEnginePath(model_name, engine_dir, do_engine_refit[model_name], suffix=model_suffix[model_name]) for model_name in model_names])) @@ -433,22 +457,16 @@ def loadEngines( print(f"[I] Calibrated weights not found, generating {state_dict_path}") pipeline = obj.get_pipeline() model = pipeline.unet - replace_lora_layers(model) calibration_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'calibration-prompts.txt') - # Use batch_size = 2 for UNet calibration - calibration_prompts = load_calib_prompts(2, calibration_file) - # TODO check size > calibration_steps - quant_config = get_smoothquant_config(model, quantization_level) - if quantization_percentile is not None: - quant_config["percentile"] = quantization_percentile - quant_config["base-step"] = int(denoising_steps) - - atq.replace_quant_module(model) - atq.set_quantizer_by_cfg(model, quant_config["quant_cfg"]) - if quantization_percentile is not None: - calibration.precentile_calib_mode(base_unet=model, quant_config=quant_config) - if quantization_alpha is not None: - calibration.reg_alpha_qkv(base_unet=model, alpha=quantization_alpha) + calibration_prompts = load_calib_prompts(calib_batch_size, calibration_file) + # TODO check size > calibration_size + quant_config = get_int8_config( + model, + quantization_level, + quantization_alpha, + quantization_percentile, + denoising_steps + ) def do_calibrate(base, calibration_prompts, **kwargs): for i_th, prompts in enumerate(calibration_prompts): @@ -462,34 +480,35 @@ def do_calibrate(base, calibration_prompts, **kwargs): ] * len(prompts), ).images - - def calibration_loop(): + + def calibration_loop(unet): + pipeline.model = unet do_calibrate( base=pipeline, calibration_prompts=calibration_prompts, - calib_size=calibration_steps, + calib_size=calibration_size // calib_batch_size, n_steps=denoising_steps, ) - print(f"[I] Performing int8 calibration for {calibration_steps} steps. This can take a long time.") - calibration.calibrate(model, quant_config["algorithm"], forward_loop=calibration_loop) - torch.save(model.state_dict(), state_dict_path) + print(f"[I] Performing int8 calibration for {calibration_size} steps.") + atq.quantize(model, quant_config, forward_loop=calibration_loop) + ato.save(model, state_dict_path) - print(f"[I] Generaing quantized ONNX model: {onnx_opt_path[model_name]}") + print(f"[I] Generating quantized ONNX model: {onnx_opt_path[model_name]}") if not os.path.exists(onnx_path[model_name]): model = obj.get_model() - replace_lora_layers(model) - atq.replace_quant_module(model) - quant_config = atq.INT8_DEFAULT_CFG - atq.set_quantizer_by_cfg(model, quant_config["quant_cfg"]) - model.load_state_dict(torch.load(state_dict_path), strict=True) - quantize_lvl(model, quantization_level) + ato.restore(model, state_dict_path) + quantize_lvl(model, quantization_level) atq.disable_quantizer(model, filter_func) - model.to(torch.float32) # QDQ needs to be in FP32 + model.to(torch.float32).to("cpu") # QDQ needs to be in FP32 + # WAR to enable ONNX export of quantized UNet + obj.device="cpu" + obj.fp16=False else: model = None obj.export_onnx(onnx_path[model_name], onnx_opt_path[model_name], onnx_opset, opt_image_height, opt_image_width, custom_model=model) - + obj.fp16=True # Part of WAR, UNET obj.fp16 defaults to True so it is safe to reset this way + # FIXME do_export_weights_map needs ONNX graph if do_export_weights_map: print(f"[I] Saving weights map: {weights_map_path[model_name]}") diff --git a/demo/Diffusion/utilities.py b/demo/Diffusion/utilities.py index 62d582f5..11f36807 100644 --- a/demo/Diffusion/utilities.py +++ b/demo/Diffusion/utilities.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -25,7 +25,6 @@ import numpy as np import onnx from onnx import numpy_helper -import onnx_graphsurgeon as gs import os from PIL import Image from polygraphy.backend.common import bytes_from_path @@ -40,9 +39,7 @@ ) from polygraphy.logger import G_LOGGER import random -import re import requests -from scipy import integrate import tensorrt as trt import torch import types @@ -406,63 +403,6 @@ def load_calib_prompts(batch_size, calib_data_path): lst = [line.rstrip("\n") for line in file] return [lst[i : i + batch_size] for i in range(0, len(lst), batch_size)] -def filter_func(name): - pattern = re.compile( - r".*(time_emb_proj|time_embedding|conv_in|conv_out|conv_shortcut|add_embedding).*" - ) - return pattern.match(name) is not None - -def quantize_lvl(unet, quant_level=2.5): - """ - We should disable the unwanted quantizer when exporting the onnx - Because in the current ammo setting, it will load the quantizer amax for all the layers even - if we didn't add that unwanted layer into the config during the calibration - """ - for name, module in unet.named_modules(): - if isinstance(module, torch.nn.Conv2d): - module.input_quantizer.enable() - module.weight_quantizer.enable() - elif isinstance(module, torch.nn.Linear): - if ( - (quant_level >= 2 and "ff.net" in name) - or (quant_level >= 2.5 and ("to_q" in name or "to_k" in name or "to_v" in name)) - or quant_level == 3 - ): - module.input_quantizer.enable() - module.weight_quantizer.enable() - else: - module.input_quantizer.disable() - module.weight_quantizer.disable() - -def get_smoothquant_config(model, quant_level=3): - quant_config = { - "quant_cfg": {}, - "algorithm": "smoothquant", - } - for name, module in model.named_modules(): - w_name = f"{name}*weight_quantizer" - i_name = f"{name}*input_quantizer" - - if ( - w_name in quant_config["quant_cfg"].keys() # type: ignore - or i_name in quant_config["quant_cfg"].keys() # type: ignore - ): - continue - if filter_func(name): - continue - if isinstance(module, torch.nn.Linear): - if ( - (quant_level >= 2 and "ff.net" in name) - or (quant_level >= 2.5 and ("to_q" in name or "to_k" in name or "to_v" in name)) - or quant_level == 3 - ): - quant_config["quant_cfg"][w_name] = {"num_bits": 8, "axis": 0} # type: ignore - quant_config["quant_cfg"][i_name] = {"num_bits": 8, "axis": -1} # type: ignore - elif isinstance(module, torch.nn.Conv2d): - quant_config["quant_cfg"][w_name] = {"num_bits": 8, "axis": 0} # type: ignore - quant_config["quant_cfg"][i_name] = {"num_bits": 8, "axis": None} # type: ignore - return quant_config - class PercentileAmaxes: def __init__(self, total_step, percentile) -> None: self.data = {} @@ -503,7 +443,7 @@ def add_arguments(parser): # TensorRT engine build parser.add_argument('--engine-dir', default='engine', help="Output directory for TensorRT engines") parser.add_argument('--int8', action='store_true', help="Apply int8 quantization.") - parser.add_argument('--quantization-level', type=float, default=3.0, choices=range(1,4), help="int8/fp8 quantization level, 1: CNN, 2: CNN+FFN, 2.5: CNN+FFN+QKV, 3: CNN+FC") + parser.add_argument('--quantization-level', type=float, default=2.5, choices=[1.0, 2.0, 2.5, 3.0], help="int8/fp8 quantization level, 1: CNN, 2: CNN+FFN, 2.5: CNN+FFN+QKV, 3: CNN+FC") parser.add_argument('--build-static-batch', action='store_true', help="Build TensorRT engines with fixed batch size.") parser.add_argument('--build-dynamic-shape', action='store_true', help="Build TensorRT engines with dynamic image shapes.") parser.add_argument('--build-enable-refit', action='store_true', help="Enable Refit option in TensorRT engines during build.") diff --git a/demo/Diffusion/utils_ammo.py b/demo/Diffusion/utils_ammo.py new file mode 100644 index 00000000..8bfe44b8 --- /dev/null +++ b/demo/Diffusion/utils_ammo.py @@ -0,0 +1,160 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re +import torch + +from ammo.torch.quantization import utils as quant_utils +from ammo.torch.quantization.calib.max import MaxCalibrator + +from diffusers.models.lora import LoRACompatibleConv, LoRACompatibleLinear + + +class PercentileCalibrator(MaxCalibrator): + def __init__(self, num_bits=8, axis=None, unsigned=False, track_amax=False, **kwargs): + super().__init__(num_bits, axis, unsigned, track_amax) + self.percentile = kwargs["percentile"] + self.total_step = kwargs["total_step"] + self.global_min = kwargs["global_min"] + self.data = {} + self.i = 0 + + def collect(self, x): + """Tracks the absolute max of all tensors. + + Args: + x: A tensor + + Raises: + RuntimeError: If amax shape changes + """ + # Swap axis to reduce. + axis = self._axis if isinstance(self._axis, (list, tuple)) else [self._axis] + # Handle negative axis. + axis = [x.dim() + i if isinstance(i, int) and i < 0 else i for i in axis] + reduce_axis = [] + for i in range(x.dim()): + if i not in axis: + reduce_axis.append(i) + local_amax = quant_utils.reduce_amax(x, axis=reduce_axis).detach() + _cur_step = self.i % self.total_step + if _cur_step not in self.data.keys(): + self.data[_cur_step] = local_amax + else: + if self.global_min: + self.data[_cur_step] = torch.min(self.data[_cur_step], local_amax) + else: + self.data[_cur_step] += local_amax + if self._track_amax: + raise NotImplementedError + self.i += 1 + + def compute_amax(self): + """Return the absolute max of all tensors collected.""" + up_lim = int(self.total_step * self.percentile) + amaxs_values = [self.data[i] / self.total_step for i in range(0, up_lim)] + act_amax = torch.vstack(amaxs_values).min(axis=0)[0] + self._calib_amax = act_amax + return self._calib_amax + + def __str__(self): + s = "PercentileCalibrator" + return s.format(**self.__dict__) + + def __repr__(self): + s = "PercentileCalibrator(" + s += super(MaxCalibrator, self).__repr__() + s += " calib_amax={_calib_amax}" + if self._track_amax: + s += " amaxs={_amaxs}" + s += ")" + return s.format(**self.__dict__) + +def filter_func(name): + pattern = re.compile( + r".*(time_emb_proj|time_embedding|conv_in|conv_out|conv_shortcut|add_embedding).*" + ) + return pattern.match(name) is not None + + +def quantize_lvl(unet, quant_level=2.5): + """ + We should disable the unwanted quantizer when exporting the onnx + Because in the current ammo setting, it will load the quantizer amax for all the layers even + if we didn't add that unwanted layer into the config during the calibration + """ + for name, module in unet.named_modules(): + if isinstance(module, (torch.nn.Conv2d, LoRACompatibleConv)): + module.input_quantizer.enable() + module.weight_quantizer.enable() + elif isinstance(module, (torch.nn.Linear, LoRACompatibleLinear)): + if ( + (quant_level >= 2 and "ff.net" in name) + or (quant_level >= 2.5 and ("to_q" in name or "to_k" in name or "to_v" in name)) + or quant_level == 3 + ): + module.input_quantizer.enable() + module.weight_quantizer.enable() + else: + module.input_quantizer.disable() + module.weight_quantizer.disable() + +def get_int8_config( + model, quant_level=2.5, alpha=0.8, percentile=1.0, num_inference_steps=20, global_min=False +): + quant_config = { + "quant_cfg": { + "*lm_head*": {"enable": False}, + "*output_layer*": {"enable": False}, + "default": {"num_bits": 8, "axis": None}, + }, + "algorithm": {"method": "smoothquant", "alpha": alpha}, + } + for name, module in model.named_modules(): + w_name = f"{name}*weight_quantizer" + i_name = f"{name}*input_quantizer" + + if w_name in quant_config["quant_cfg"].keys() or i_name in quant_config["quant_cfg"].keys(): + continue + if filter_func(name): + continue + if isinstance(module, (torch.nn.Linear, LoRACompatibleLinear)): + if ( + (quant_level >= 2 and "ff.net" in name) + or (quant_level >= 2.5 and ("to_q" in name or "to_k" in name or "to_v" in name)) + or quant_level == 3 + ): + quant_config["quant_cfg"][w_name] = {"num_bits": 8, "axis": 0} + quant_config["quant_cfg"][i_name] = {"num_bits": 8, "axis": -1} + elif isinstance(module, (torch.nn.Conv2d, LoRACompatibleConv)): + quant_config["quant_cfg"][w_name] = {"num_bits": 8, "axis": 0} + quant_config["quant_cfg"][i_name] = { + "num_bits": 8, + "axis": None, + "calibrator": ( + PercentileCalibrator, + (), + { + "num_bits": 8, + "axis": None, + "percentile": percentile, + "total_step": num_inference_steps, + "global_min": global_min, + }, + ), + } + return quant_config diff --git a/demo/Jasper/README.md b/demo/Jasper/README.md deleted file mode 100644 index f8988c08..00000000 --- a/demo/Jasper/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Jasper Inference Using TensorRT - -[Jupyter Notebook](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/SpeechRecognition/Jasper/notebooks/) diff --git a/demo/Tacotron2/README.md b/demo/Tacotron2/README.md deleted file mode 100644 index c687c5ee..00000000 --- a/demo/Tacotron2/README.md +++ /dev/null @@ -1,113 +0,0 @@ -# Tacotron 2 and WaveGlow Inference with TensorRT - -The Tacotron2 and WaveGlow models form a text-to-speech (TTS) system that enables users to synthesize natural sounding speech from raw transcripts without any additional information such as patterns and/or rhythms of speech. This is an implementation of Tacotron2 for PyTorch, tested and maintained by NVIDIA, and provides scripts to perform high-performance inference using NVIDIA TensorRT. More information about the TTS system and its training can be found in the -[NVIDIA DeepLearningExamples](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2). - -NVIDIA TensorRT is a platform for high-performance deep learning inference. It includes a deep learning inference optimizer and runtime that delivers low latency and high-throughput for deep learning inference applications. After optimizing the compute-intensive acoustic model with NVIDIA TensorRT, inference throughput increased by up to 1.4x over native PyTorch in mixed precision. - -### Software Versions - -|Software|Version| -|--------|-------| -|Python|3.8.10| -|CUDA|12.2| -|Apex|0.1| -|TensorRT|9.0| -|PyTorch|2.0.1| - - -## Quick Start Guide - -1. Build and launch the container as described in [TensorRT OSS README](https://github.com/NVIDIA/TensorRT/blob/master/README.md). - - **Note:** After this point, all commands should be run from within the container. - -2. Verify TensorRT installation by printing the version: - ```bash - python3 -c "import tensorrt as trt; print(trt.__version__)" - ``` - -3. Install prerequisite software for TTS sample: - ```bash - cd $TRT_OSSPATH/demo/Tacotron2 - bash ./scripts/install_prerequisites.sh - ``` -4. Download pretrained checkpoints from [NGC](https://ngc.nvidia.com/catalog/models) into the `./checkpoints` directory: - -- [Tacotron2 checkpoint](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_fp16) -- [WaveGlow checkpoint](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_fp16) - - ```bash - bash ./scripts/download_checkpoints.sh - ``` - -5. Export the models to ONNX intermediate representation (ONNX IR). - Export Tacotron 2 to three ONNX parts: Encoder, Decoder, and Postnet: - - ```bash - mkdir -p output - python3 tensorrt/convert_tacotron22onnx.py --tacotron2 checkpoints/tacotron2_pyt_ckpt_amp_v19.09.0/nvidia_tacotron2pyt_fp16_20190427 -o output/ --fp16 - ``` - - Convert WaveGlow to ONNX IR: - - ```bash - python3 tensorrt/convert_waveglow2onnx.py --waveglow ./checkpoints/waveglow_ckpt_amp_256_v19.10.0/nvidia_waveglow256pyt_fp16 --config-file config.json --wn-channels 256 -o output/ --fp16 - ``` - - The above commands store the generated ONNX files under the `./output/` directory: - `encoder.onnx`, `decoder_iter.onnx`, `postnet.onnx`, `waveglow.onnx`, `loop_body_fp16.onnx`, and `decoder.onnx` (on TensorRT 8.0+ if `--no-loop` option is not specified). - -6. Export the ONNX IRs to TensorRT engines with fp16 mode enabled: - - ```bash - python3 tensorrt/convert_onnx2trt.py --encoder output/encoder.onnx --decoder output/decoder.onnx --postnet output/postnet.onnx --waveglow output/waveglow.onnx -o output/ --fp16 - ``` - - After running the command, there should be four new engine files in `./output/` directory: - `encoder_fp16.engine`, `decoder_with_outer_loop_fp16.engine`, `postnet_fp16.engine`, and `waveglow_fp16.engine`. On TensorRT <8.0 or if `--no-loop` option is specified, `decoder_iter_fp16.engine` is generated instead. - -7. Run TTS inference pipeline with fp16: - - - ```bash - python3 tensorrt/inference_trt.py -i phrases/phrase.txt --encoder output/encoder_fp16.engine --decoder output/decoder_with_outer_loop_fp16.engine --postnet output/postnet_fp16.engine --waveglow output/waveglow_fp16.engine -o output/ --fp16 - ``` - - On TensorRT <8.0 use `decoder_iter_fp16.engine` for the decoder instead. - -## Performance - -### Benchmarking - -The following section shows how to benchmark the TensorRT inference performance for our Tacotron2 + Waveglow TTS. - -#### TensorRT inference benchmark - -Before running the benchmark script, please download the checkpoints and build the TensorRT engines for the Tacotron2 and Waveglow models as prescribed in the [Quick Start Guide](#quick-start-guide) above. - -The inference benchmark is performed on a single GPU by the `inference_benchmark.sh` script, which runs 3 warm-up iterations then runs timed inference for 1000 iterations. - -```bash -bash scripts/inference_benchmark.sh -``` - -*Note*: For benchmarking we use WaveGlow with 256 residual channels, and Tacotron2 decoder with outer loop for TensorRT inference. - -### Results - -> Note: Results last updated for TensorRT 8.0.1.6 release. - -#### Inference performance: NVIDIA T4 (16GB) - -|Framework|Batch size|Input length|Precision|Avg latency (s)|Latency std (s)|Latency confidence interval 90% (s)|Latency confidence interval 95% (s)|Latency confidence interval 99% (s)|Throughput (samples/sec)|Speed-up PyT+TRT/TRT|Avg mels generated (81 mels=1 sec of speech)| Avg audio length (s)| Avg RTF| -|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:| -|PyT+TRT|1| 128| FP16| 0.1662 | 0.0036 | 0.1705 | 0.1717 | 0.1736 | 871,568 | 7.64 | 566 | 6.99 | 42.03 | -|PyT |1| 128| FP16| 1.27 | 0.07 | 1.36 | 1.38 | 1.44 | 121,184 | 1.00 | 601 | 7.42 | 5.84 | - -#### Inference performance: NVIDIA V100 (16GB) - -|Framework|Batch size|Input length|Precision|Avg latency (s)|Latency std (s)|Latency confidence interval 90% (s)|Latency confidence interval 95% (s)|Latency confidence interval 99% (s)|Throughput (samples/sec)|Speed-up PyT+TRT/TRT|Avg mels generated (81 mels=1 sec of speech)| Avg audio length (s)| Avg RTF| -|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:| -|PyT+TRT|1| 128| FP16| 0.1641 | 0.0046 | 0.1694 | 0.1707 | 0.1731 | 900,884 | 6.52 | 577 | 7.13 | 43.44 | -|PyT |1| 128| FP16| 1.07 | 0.06 | 1.14 | 1.17 | 1.23 | 144,668 | 1.00 | 602 | 7.42 | 6.95 | diff --git a/demo/Tacotron2/common/audio_processing.py b/demo/Tacotron2/common/audio_processing.py deleted file mode 100644 index 7b261cec..00000000 --- a/demo/Tacotron2/common/audio_processing.py +++ /dev/null @@ -1,110 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import torch -import numpy as np -from scipy.signal import get_window -import librosa.util as librosa_util - - -def window_sumsquare(window, n_frames, hop_length=200, win_length=800, - n_fft=800, dtype=np.float32, norm=None): - """ - # from librosa 0.6 - Compute the sum-square envelope of a window function at a given hop length. - - This is used to estimate modulation effects induced by windowing - observations in short-time fourier transforms. - - Parameters - ---------- - window : string, tuple, number, callable, or list-like - Window specification, as in `get_window` - - n_frames : int > 0 - The number of analysis frames - - hop_length : int > 0 - The number of samples to advance between frames - - win_length : [optional] - The length of the window function. By default, this matches `n_fft`. - - n_fft : int > 0 - The length of each analysis frame. - - dtype : np.dtype - The data type of the output - - Returns - ------- - wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))` - The sum-squared envelope of the window function - """ - if win_length is None: - win_length = n_fft - - n = n_fft + hop_length * (n_frames - 1) - x = np.zeros(n, dtype=dtype) - - # Compute the squared window at the desired length - win_sq = get_window(window, win_length, fftbins=True) - win_sq = librosa_util.normalize(win_sq, norm=norm)**2 - win_sq = librosa_util.pad_center(win_sq, size=n_fft) - - # Fill the envelope - for i in range(n_frames): - sample = i * hop_length - x[sample:min(n, sample + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))] - return x - - -def griffin_lim(magnitudes, stft_fn, n_iters=30): - """ - PARAMS - ------ - magnitudes: spectrogram magnitudes - stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods - """ - - angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size()))) - angles = angles.astype(np.float32) - angles = torch.autograd.Variable(torch.from_numpy(angles)) - signal = stft_fn.inverse(magnitudes, angles).squeeze(1) - - for i in range(n_iters): - _, angles = stft_fn.transform(signal) - signal = stft_fn.inverse(magnitudes, angles).squeeze(1) - return signal - - -def dynamic_range_compression(x, C=1, clip_val=1e-5): - """ - PARAMS - ------ - C: compression factor - """ - return torch.log(torch.clamp(x, min=clip_val) * C) - - -def dynamic_range_decompression(x, C=1): - """ - PARAMS - ------ - C: compression factor used to compress - """ - return torch.exp(x) / C diff --git a/demo/Tacotron2/common/layers.py b/demo/Tacotron2/common/layers.py deleted file mode 100644 index cbeb4910..00000000 --- a/demo/Tacotron2/common/layers.py +++ /dev/null @@ -1,96 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import torch -from librosa.filters import mel as librosa_mel_fn -from common.audio_processing import dynamic_range_compression, dynamic_range_decompression -from common.stft import STFT - - -class LinearNorm(torch.nn.Module): - def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'): - super(LinearNorm, self).__init__() - self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias) - - torch.nn.init.xavier_uniform_( - self.linear_layer.weight, - gain=torch.nn.init.calculate_gain(w_init_gain)) - - def forward(self, x): - return self.linear_layer(x) - - -class ConvNorm(torch.nn.Module): - def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, - padding=None, dilation=1, bias=True, w_init_gain='linear'): - super(ConvNorm, self).__init__() - if padding is None: - assert(kernel_size % 2 == 1) - padding = int(dilation * (kernel_size - 1) / 2) - - self.conv = torch.nn.Conv1d(in_channels, out_channels, - kernel_size=kernel_size, stride=stride, - padding=padding, dilation=dilation, - bias=bias) - - torch.nn.init.xavier_uniform_( - self.conv.weight, - gain=torch.nn.init.calculate_gain(w_init_gain)) - - def forward(self, signal): - return self.conv(signal) - - -class TacotronSTFT(torch.nn.Module): - def __init__(self, filter_length=1024, hop_length=256, win_length=1024, - n_mel_channels=80, sampling_rate=22050, mel_fmin=0.0, - mel_fmax=8000.0): - super(TacotronSTFT, self).__init__() - self.n_mel_channels = n_mel_channels - self.sampling_rate = sampling_rate - self.stft_fn = STFT(filter_length, hop_length, win_length) - mel_basis = librosa_mel_fn( - sampling_rate, filter_length, n_mel_channels, mel_fmin, mel_fmax) - mel_basis = torch.from_numpy(mel_basis).float() - self.register_buffer('mel_basis', mel_basis) - - def spectral_normalize(self, magnitudes): - output = dynamic_range_compression(magnitudes) - return output - - def spectral_de_normalize(self, magnitudes): - output = dynamic_range_decompression(magnitudes) - return output - - def mel_spectrogram(self, y): - """Computes mel-spectrograms from a batch of waves - PARAMS - ------ - y: Variable(torch.FloatTensor) with shape (B, T) in range [-1, 1] - - RETURNS - ------- - mel_output: torch.FloatTensor of shape (B, n_mel_channels, T) - """ - assert(torch.min(y.data) >= -1) - assert(torch.max(y.data) <= 1) - - magnitudes, phases = self.stft_fn.transform(y) - magnitudes = magnitudes.data - mel_output = torch.matmul(self.mel_basis, magnitudes) - mel_output = self.spectral_normalize(mel_output) - return mel_output diff --git a/demo/Tacotron2/common/stft.py b/demo/Tacotron2/common/stft.py deleted file mode 100644 index 0341d60e..00000000 --- a/demo/Tacotron2/common/stft.py +++ /dev/null @@ -1,159 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" -BSD 3-Clause License - -Copyright (c) 2017, Prem Seetharaman -All rights reserved. - -* Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from this - software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -""" - -import torch -import numpy as np -import torch.nn.functional as F -from torch.autograd import Variable -from scipy.signal import get_window -from librosa.util import pad_center, tiny -from common.audio_processing import window_sumsquare - - -class STFT(torch.nn.Module): - """adapted from Prem Seetharaman's https://github.com/pseeth/pytorch-stft""" - def __init__(self, filter_length=800, hop_length=200, win_length=800, - window='hann'): - super(STFT, self).__init__() - self.filter_length = filter_length - self.hop_length = hop_length - self.win_length = win_length - self.window = window - self.forward_transform = None - scale = self.filter_length / self.hop_length - fourier_basis = np.fft.fft(np.eye(self.filter_length)) - - cutoff = int((self.filter_length / 2 + 1)) - fourier_basis = np.vstack([np.real(fourier_basis[:cutoff, :]), - np.imag(fourier_basis[:cutoff, :])]) - - forward_basis = torch.FloatTensor(fourier_basis[:, None, :]) - inverse_basis = torch.FloatTensor( - np.linalg.pinv(scale * fourier_basis).T[:, None, :].astype(np.float32)) - - if window is not None: - assert(filter_length >= win_length) - # get window and zero center pad it to filter_length - fft_window = get_window(window, win_length, fftbins=True) - fft_window = pad_center(fft_window, size=filter_length) - fft_window = torch.from_numpy(fft_window).float() - - # window the bases - forward_basis *= fft_window - inverse_basis *= fft_window - - self.register_buffer('forward_basis', forward_basis.float()) - self.register_buffer('inverse_basis', inverse_basis.float()) - - def transform(self, input_data): - num_batches = input_data.size(0) - num_samples = input_data.size(1) - - self.num_samples = num_samples - - # similar to librosa, reflect-pad the input - input_data = input_data.view(num_batches, 1, num_samples) - input_data = F.pad( - input_data.unsqueeze(1), - (int(self.filter_length / 2), int(self.filter_length / 2), 0, 0), - mode='reflect') - input_data = input_data.squeeze(1) - - forward_transform = F.conv1d( - input_data, - Variable(self.forward_basis, requires_grad=False), - stride=self.hop_length, - padding=0) - - cutoff = int((self.filter_length / 2) + 1) - real_part = forward_transform[:, :cutoff, :] - imag_part = forward_transform[:, cutoff:, :] - - magnitude = torch.sqrt(real_part**2 + imag_part**2) - phase = torch.autograd.Variable( - torch.atan2(imag_part.data, real_part.data)) - - return magnitude, phase - - def inverse(self, magnitude, phase): - recombine_magnitude_phase = torch.cat( - [magnitude*torch.cos(phase), magnitude*torch.sin(phase)], dim=1) - - inverse_transform = F.conv_transpose2d( - recombine_magnitude_phase.unsqueeze(-1), - Variable(self.inverse_basis.unsqueeze(-1), requires_grad=False), - stride=(self.hop_length,1), - padding=(0,0)) - inverse_transform = inverse_transform.squeeze(-1) - - if self.window is not None: - window_sum = window_sumsquare( - self.window, magnitude.size(-1), hop_length=self.hop_length, - win_length=self.win_length, n_fft=self.filter_length, - dtype=np.float32) - # remove modulation effects - approx_nonzero_indices = torch.from_numpy( - np.where(window_sum > tiny(window_sum))[0]) - window_sum = torch.autograd.Variable( - torch.from_numpy(window_sum), requires_grad=False) - window_sum = window_sum.cuda() if magnitude.is_cuda else window_sum - inverse_transform[:, :, approx_nonzero_indices] /= window_sum[approx_nonzero_indices] - - # scale by hop ratio - inverse_transform *= float(self.filter_length) / self.hop_length - - inverse_transform = inverse_transform[:, :, int(self.filter_length/2):] - inverse_transform = inverse_transform[:, :, :-int(self.filter_length/2):] - - return inverse_transform - - def forward(self, input_data): - self.magnitude, self.phase = self.transform(input_data) - reconstruction = self.inverse(self.magnitude, self.phase) - return reconstruction diff --git a/demo/Tacotron2/common/utils.py b/demo/Tacotron2/common/utils.py deleted file mode 100644 index 6cccbf22..00000000 --- a/demo/Tacotron2/common/utils.py +++ /dev/null @@ -1,72 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -from scipy.io.wavfile import read -import torch -import os - -import argparse -import json - -class ParseFromConfigFile(argparse.Action): - - def __init__(self, option_strings, type, dest, help=None, required=False): - super(ParseFromConfigFile, self).__init__(option_strings=option_strings, type=type, dest=dest, help=help, required=required) - - def __call__(self, parser, namespace, values, option_string): - with open(values, 'r') as f: - data = json.load(f) - - for group in data.keys(): - for k,v in data[group].items(): - underscore_k = k.replace('-', '_') - setattr(namespace, underscore_k, v) - -def get_mask_from_lengths(lengths): - max_len = torch.max(lengths).item() - ids = torch.arange(0, max_len, device=lengths.device, dtype=lengths.dtype) - mask = (ids < lengths.unsqueeze(1)).byte() - mask = torch.le(mask, 0) - return mask - - -def load_wav_to_torch(full_path): - sampling_rate, data = read(full_path) - return torch.FloatTensor(data.astype(np.float32)), sampling_rate - - -def load_filepaths_and_text(dataset_path, filename, split="|"): - with open(filename, encoding='utf-8') as f: - def split_line(root, line): - parts = line.strip().split(split) - if len(parts) > 2: - raise Exception( - "incorrect line format for file: {}".format(filename)) - path = os.path.join(root, parts[0]) - text = parts[1] - return path,text - filepaths_and_text = [split_line(dataset_path, line) for line in f] - return filepaths_and_text - - -def to_gpu(x): - x = x.contiguous() - - if torch.cuda.is_available(): - x = x.cuda(non_blocking=True) - return x diff --git a/demo/Tacotron2/config.json b/demo/Tacotron2/config.json deleted file mode 100644 index 07ab289e..00000000 --- a/demo/Tacotron2/config.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "audio": { - "max-wav-value": 32768.0, - "sampling-rate": 22050, - "filter-length": 1024, - "hop-length": 256, - "win-length": 1024, - "mel-fmin": 0.0, - "mel-fmax": 7000.0 - } -} diff --git a/demo/Tacotron2/data_functions.py b/demo/Tacotron2/data_functions.py deleted file mode 100644 index 623e5af6..00000000 --- a/demo/Tacotron2/data_functions.py +++ /dev/null @@ -1,58 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import torch -from tacotron2.data_function import TextMelCollate -from tacotron2.data_function import TextMelLoader -from waveglow.data_function import MelAudioLoader -from tacotron2.data_function import batch_to_gpu as batch_to_gpu_tacotron2 -from waveglow.data_function import batch_to_gpu as batch_to_gpu_waveglow - - -def get_collate_function(model_name, n_frames_per_step): - if model_name == 'Tacotron2': - collate_fn = TextMelCollate(n_frames_per_step) - elif model_name == 'WaveGlow': - collate_fn = torch.utils.data.dataloader.default_collate - else: - raise NotImplementedError( - "unknown collate function requested: {}".format(model_name)) - - return collate_fn - - -def get_data_loader(model_name, dataset_path, audiopaths_and_text, args): - if model_name == 'Tacotron2': - data_loader = TextMelLoader(dataset_path, audiopaths_and_text, args) - elif model_name == 'WaveGlow': - data_loader = MelAudioLoader(dataset_path, audiopaths_and_text, args) - else: - raise NotImplementedError( - "unknown data loader requested: {}".format(model_name)) - - return data_loader - - -def get_batch_to_gpu(model_name): - if model_name == 'Tacotron2': - batch_to_gpu = batch_to_gpu_tacotron2 - elif model_name == 'WaveGlow': - batch_to_gpu = batch_to_gpu_waveglow - else: - raise NotImplementedError( - "unknown batch_to_gpu requested: {}".format(model_name)) - return batch_to_gpu diff --git a/demo/Tacotron2/inference.py b/demo/Tacotron2/inference.py deleted file mode 100644 index 77bbccc1..00000000 --- a/demo/Tacotron2/inference.py +++ /dev/null @@ -1,266 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from tacotron2.text import text_to_sequence -import models -import torch -import argparse -import numpy as np -from scipy.io.wavfile import write -import matplotlib -import matplotlib.pyplot as plt - -import sys - -import time -import dllogger as DLLogger -from dllogger import StdOutBackend, JSONStreamBackend, Verbosity - -from waveglow.denoiser import Denoiser - -def parse_args(parser): - """ - Parse commandline arguments. - """ - parser.add_argument('-i', '--input', type=str, required=True, - help='Full path to the input text (phareses separated by new line)') - parser.add_argument('-o', '--output', required=True, - help='Output folder to save audio (file per phrase)') - parser.add_argument('--suffix', type=str, default="", - help="Output filename suffix") - parser.add_argument('--tacotron2', type=str, - help='Full path to the Tacotron2 model checkpoint file') - parser.add_argument('--waveglow', type=str, - help='Full path to the WaveGlow model checkpoint file') - parser.add_argument('-s', '--sigma-infer', default=0.9, type=float, - help='Standard deviation of the Gaussian distribution') - parser.add_argument('-d', '--denoising-strength', default=0.01, type=float, - help='Denoising strength for removing model bias') - parser.add_argument('-sr', '--sampling-rate', default=22050, type=int, - help='Sampling rate') - - run_mode = parser.add_mutually_exclusive_group() - run_mode.add_argument('--fp16', action='store_true', - help='Run inference with mixed precision') - run_mode.add_argument('--cpu', action='store_true', - help='Run inference on CPU') - - parser.add_argument('--log-file', type=str, default='nvlog.json', - help='Filename for logging') - parser.add_argument('--include-warmup', action='store_true', - help='Include warmup') - parser.add_argument('--stft-hop-length', type=int, default=256, - help='STFT hop length for estimating audio length from mel size') - - return parser - - -def checkpoint_from_distributed(state_dict): - """ - Checks whether checkpoint was generated by DistributedDataParallel. DDP - wraps model in additional "module.", it needs to be unwrapped for single - GPU inference. - :param state_dict: model's state dict - """ - ret = False - for key, _ in state_dict.items(): - if key.find('module.') != -1: - ret = True - break - return ret - - -def unwrap_distributed(state_dict): - """ - Unwraps model from DistributedDataParallel. - DDP wraps model in additional "module.", it needs to be removed for single - GPU inference. - :param state_dict: model's state dict - """ - new_state_dict = {} - for key, value in state_dict.items(): - new_key = key.replace('module.', '') - new_state_dict[new_key] = value - return new_state_dict - - -def load_and_setup_model(model_name, parser, checkpoint, fp16_run, cpu_run, forward_is_infer=False): - model_parser = models.parse_model_args(model_name, parser, add_help=False) - model_args, _ = model_parser.parse_known_args() - - model_config = models.get_model_config(model_name, model_args) - model = models.get_model(model_name, model_config, to_cuda=(not cpu_run), - forward_is_infer=forward_is_infer) - - if checkpoint is not None: - if cpu_run: - state_dict = torch.load(checkpoint, map_location=torch.device('cpu'))['state_dict'] - else: - state_dict = torch.load(checkpoint)['state_dict'] - if checkpoint_from_distributed(state_dict): - state_dict = unwrap_distributed(state_dict) - - model.load_state_dict(state_dict) - - if model_name == "WaveGlow": - model = model.remove_weightnorm(model) - - model.eval() - - if fp16_run: - model.half() - - return model - - -# taken from tacotron2/data_function.py:TextMelCollate.__call__ -def pad_sequences(batch): - # Right zero-pad all one-hot text sequences to max input length - input_lengths, ids_sorted_decreasing = torch.sort( - torch.LongTensor([len(x) for x in batch]), - dim=0, descending=True) - max_input_len = input_lengths[0] - - text_padded = torch.LongTensor(len(batch), max_input_len) - text_padded.zero_() - for i in range(len(ids_sorted_decreasing)): - text = batch[ids_sorted_decreasing[i]] - text_padded[i, :text.size(0)] = text - - return text_padded, input_lengths - - -def prepare_input_sequence(texts, cpu_run=False): - - d = [] - for i,text in enumerate(texts): - d.append(torch.IntTensor( - text_to_sequence(text, ['english_cleaners'])[:])) - - text_padded, input_lengths = pad_sequences(d) - if not cpu_run: - text_padded = text_padded.cuda().long() - input_lengths = input_lengths.cuda().long() - else: - text_padded = text_padded.long() - input_lengths = input_lengths.long() - - return text_padded, input_lengths - - -class MeasureTime(): - def __init__(self, measurements, key, cpu_run=False): - self.measurements = measurements - self.key = key - self.cpu_run = cpu_run - - def __enter__(self): - if not self.cpu_run: - torch.cuda.synchronize() - self.t0 = time.perf_counter() - - def __exit__(self, exc_type, exc_value, exc_traceback): - if not self.cpu_run: - torch.cuda.synchronize() - self.measurements[self.key] = time.perf_counter() - self.t0 - - -def main(): - """ - Launches text to speech (inference). - Inference is executed on a single GPU or CPU. - """ - parser = argparse.ArgumentParser( - description='PyTorch Tacotron 2 Inference') - parser = parse_args(parser) - args, _ = parser.parse_known_args() - - DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, - args.output+'/'+args.log_file), - StdOutBackend(Verbosity.VERBOSE)]) - for k,v in vars(args).items(): - DLLogger.log(step="PARAMETER", data={k:v}) - DLLogger.log(step="PARAMETER", data={'model_name':'Tacotron2_PyT'}) - - tacotron2 = load_and_setup_model('Tacotron2', parser, args.tacotron2, - args.fp16, args.cpu, forward_is_infer=True) - waveglow = load_and_setup_model('WaveGlow', parser, args.waveglow, - args.fp16, args.cpu, forward_is_infer=True) - denoiser = Denoiser(waveglow) - if not args.cpu: - denoiser.cuda() - - jitted_tacotron2 = torch.jit.script(tacotron2) - - texts = [] - try: - f = open(args.input, 'r') - texts = f.readlines() - except: - print("Could not read file") - sys.exit(1) - - if args.include_warmup: - sequence = torch.randint(low=0, high=148, size=(1,50)).long() - input_lengths = torch.IntTensor([sequence.size(1)]).long() - if not args.cpu: - sequence = sequence.cuda() - input_lengths = input_lengths.cuda() - for i in range(3): - with torch.no_grad(): - mel, mel_lengths, _ = jitted_tacotron2(sequence, input_lengths) - _ = waveglow(mel) - - measurements = {} - - sequences_padded, input_lengths = prepare_input_sequence(texts, args.cpu) - - with torch.no_grad(), MeasureTime(measurements, "tacotron2_time", args.cpu): - mel, mel_lengths, alignments = jitted_tacotron2(sequences_padded, input_lengths) - - with torch.no_grad(), MeasureTime(measurements, "waveglow_time", args.cpu): - audios = waveglow(mel, sigma=args.sigma_infer) - audios = audios.float() - with torch.no_grad(), MeasureTime(measurements, "denoiser_time", args.cpu): - audios = denoiser(audios, strength=args.denoising_strength).squeeze(1) - - print("Stopping after",mel.size(2),"decoder steps") - tacotron2_infer_perf = mel.size(0)*mel.size(2)/measurements['tacotron2_time'] - waveglow_infer_perf = audios.size(0)*audios.size(1)/measurements['waveglow_time'] - - DLLogger.log(step=0, data={"tacotron2_items_per_sec": tacotron2_infer_perf}) - DLLogger.log(step=0, data={"tacotron2_latency": measurements['tacotron2_time']}) - DLLogger.log(step=0, data={"waveglow_items_per_sec": waveglow_infer_perf}) - DLLogger.log(step=0, data={"waveglow_latency": measurements['waveglow_time']}) - DLLogger.log(step=0, data={"denoiser_latency": measurements['denoiser_time']}) - DLLogger.log(step=0, data={"latency": (measurements['tacotron2_time']+measurements['waveglow_time']+measurements['denoiser_time'])}) - - for i, audio in enumerate(audios): - - plt.imshow(alignments[i].float().data.cpu().numpy().T, aspect="auto", origin="lower") - figure_path = args.output+"alignment_"+str(i)+"_"+args.suffix+".png" - plt.savefig(figure_path) - - audio = audio[:mel_lengths[i]*args.stft_hop_length] - audio = audio/torch.max(torch.abs(audio)) - audio_path = args.output+"audio_"+str(i)+"_"+args.suffix+".wav" - write(audio_path, args.sampling_rate, audio.cpu().numpy()) - - DLLogger.flush() - -if __name__ == '__main__': - main() diff --git a/demo/Tacotron2/inference_perf.py b/demo/Tacotron2/inference_perf.py deleted file mode 100644 index cb13463e..00000000 --- a/demo/Tacotron2/inference_perf.py +++ /dev/null @@ -1,117 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import models -import torch -import argparse -import numpy as np -import json -import time - -from inference import checkpoint_from_distributed, unwrap_distributed, load_and_setup_model, MeasureTime - -import dllogger as DLLogger -from dllogger import StdOutBackend, JSONStreamBackend, Verbosity - -from apex import amp - -def parse_args(parser): - """ - Parse commandline arguments. - """ - parser.add_argument('-m', '--model-name', type=str, default='', required=True, - help='Model to train') - parser.add_argument('-sr', '--sampling-rate', default=22050, type=int, - help='Sampling rate') - parser.add_argument('--amp-run', action='store_true', - help='Inference with Automatic Mixed Precision') - parser.add_argument('-bs', '--batch-size', type=int, default=1, - help='Batch size') - parser.add_argument('-o', '--output', type=str, required=True, - help='Directory to save results') - parser.add_argument('--log-file', type=str, default='nvlog.json', - help='Filename for logging') - - return parser - - -def main(): - """ - Launches inference benchmark. - Inference is executed on a single GPU. - """ - parser = argparse.ArgumentParser( - description='PyTorch Tacotron 2 Inference') - parser = parse_args(parser) - args, _ = parser.parse_known_args() - - log_file = args.log_file - - DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, - args.output+'/'+args.log_file), - StdOutBackend(Verbosity.VERBOSE)]) - for k,v in vars(args).items(): - DLLogger.log(step="PARAMETER", data={k:v}) - DLLogger.log(step="PARAMETER", data={'model_name':'Tacotron2_PyT'}) - - model = load_and_setup_model(args.model_name, parser, None, args.amp_run, - forward_is_infer=True) - - if args.model_name == "Tacotron2": - model = torch.jit.script(model) - - warmup_iters = 3 - num_iters = 1+warmup_iters - - for i in range(num_iters): - - measurements = {} - - if args.model_name == 'Tacotron2': - text_padded = torch.randint(low=0, high=148, size=(args.batch_size, 140), - dtype=torch.long).cuda() - input_lengths = torch.IntTensor([text_padded.size(1)]*args.batch_size).cuda().long() - with torch.no_grad(), MeasureTime(measurements, "inference_time"): - mels, _, _ = model(text_padded, input_lengths) - num_items = mels.size(0)*mels.size(2) - - if args.model_name == 'WaveGlow': - n_mel_channels = model.upsample.in_channels - num_mels = 895 - mel_padded = torch.zeros(args.batch_size, n_mel_channels, - num_mels).normal_(-5.62, 1.98).cuda() - if args.amp_run: - mel_padded = mel_padded.half() - - with torch.no_grad(), MeasureTime(measurements, "inference_time"): - audios = model(mel_padded) - audios = audios.float() - num_items = audios.size(0)*audios.size(1) - - if i >= warmup_iters: - DLLogger.log(step=(i-warmup_iters,), data={"latency": measurements['inference_time']}) - DLLogger.log(step=(i-warmup_iters,), data={"items_per_sec": num_items/measurements['inference_time']}) - - DLLogger.log(step=tuple(), - data={'infer_latency': measurements['inference_time']}) - DLLogger.log(step=tuple(), - data={'infer_items_per_sec': num_items/measurements['inference_time']}) - - DLLogger.flush() - -if __name__ == '__main__': - main() diff --git a/demo/Tacotron2/main.py b/demo/Tacotron2/main.py deleted file mode 100644 index 2fee8563..00000000 --- a/demo/Tacotron2/main.py +++ /dev/null @@ -1,43 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -from train import main as main_train -from inference_perf import main as main_infer - -def parse_args(parser): - """ - Parse commandline arguments. - """ - - parser.add_argument('--bench-class', type=str, choices=['train', 'perf-infer', 'perf-train'], required=True, help='Choose test class') - - return parser - -def main(): - - parser = argparse.ArgumentParser(description='PyTorch Tacotron 2 Testing') - parser = parse_args(parser) - args, unknown_args = parser.parse_known_args() - - if "train" in args.bench_class: - main_train() - else: - main_infer() - -if __name__ == '__main__': - main() diff --git a/demo/Tacotron2/models.py b/demo/Tacotron2/models.py deleted file mode 100644 index fad8af46..00000000 --- a/demo/Tacotron2/models.py +++ /dev/null @@ -1,137 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import sys -from os.path import abspath, dirname -# enabling modules discovery from global entrypoint -sys.path.append(abspath(dirname(__file__)+'/')) -from tacotron2.model import Tacotron2 -from waveglow.model import WaveGlow -import torch - - -def parse_model_args(model_name, parser, add_help=False): - if model_name == 'Tacotron2': - from tacotron2.arg_parser import parse_tacotron2_args - return parse_tacotron2_args(parser, add_help) - if model_name == 'WaveGlow': - from waveglow.arg_parser import parse_waveglow_args - return parse_waveglow_args(parser, add_help) - else: - raise NotImplementedError(model_name) - - -def batchnorm_to_float(module): - """Converts batch norm to FP32""" - if isinstance(module, torch.nn.modules.batchnorm._BatchNorm): - module.float() - for child in module.children(): - batchnorm_to_float(child) - return module - - -def init_bn(module): - if isinstance(module, torch.nn.modules.batchnorm._BatchNorm): - if module.affine: - module.weight.data.uniform_() - for child in module.children(): - init_bn(child) - - -def get_model(model_name, model_config, to_cuda, - uniform_initialize_bn_weight=False, forward_is_infer=False): - """ Code chooses a model based on name""" - model = None - if model_name == 'Tacotron2': - if forward_is_infer: - class Tacotron2__forward_is_infer(Tacotron2): - def forward(self, inputs, input_lengths): - return self.infer(inputs, input_lengths) - model = Tacotron2__forward_is_infer(**model_config) - else: - model = Tacotron2(**model_config) - elif model_name == 'WaveGlow': - if forward_is_infer: - class WaveGlow__forward_is_infer(WaveGlow): - def forward(self, spect, sigma=1.0): - return self.infer(spect, sigma) - model = WaveGlow__forward_is_infer(**model_config) - else: - model = WaveGlow(**model_config) - else: - raise NotImplementedError(model_name) - - if uniform_initialize_bn_weight: - init_bn(model) - - if to_cuda: - model = model.cuda() - return model - - -def get_model_config(model_name, args): - """ Code chooses a model based on name""" - if model_name == 'Tacotron2': - model_config = dict( - # optimization - mask_padding=args.mask_padding, - # audio - n_mel_channels=args.n_mel_channels, - # symbols - n_symbols=args.n_symbols, - symbols_embedding_dim=args.symbols_embedding_dim, - # encoder - encoder_kernel_size=args.encoder_kernel_size, - encoder_n_convolutions=args.encoder_n_convolutions, - encoder_embedding_dim=args.encoder_embedding_dim, - # attention - attention_rnn_dim=args.attention_rnn_dim, - attention_dim=args.attention_dim, - # attention location - attention_location_n_filters=args.attention_location_n_filters, - attention_location_kernel_size=args.attention_location_kernel_size, - # decoder - n_frames_per_step=args.n_frames_per_step, - decoder_rnn_dim=args.decoder_rnn_dim, - prenet_dim=args.prenet_dim, - max_decoder_steps=args.max_decoder_steps, - gate_threshold=args.gate_threshold, - p_attention_dropout=args.p_attention_dropout, - p_decoder_dropout=args.p_decoder_dropout, - # postnet - postnet_embedding_dim=args.postnet_embedding_dim, - postnet_kernel_size=args.postnet_kernel_size, - postnet_n_convolutions=args.postnet_n_convolutions, - decoder_no_early_stopping=args.decoder_no_early_stopping - ) - return model_config - elif model_name == 'WaveGlow': - model_config = dict( - n_mel_channels=args.n_mel_channels, - n_flows=args.flows, - n_group=args.groups, - n_early_every=args.early_every, - n_early_size=args.early_size, - WN_config=dict( - n_layers=args.wn_layers, - kernel_size=args.wn_kernel_size, - n_channels=args.wn_channels - ) - ) - return model_config - else: - raise NotImplementedError(model_name) diff --git a/demo/Tacotron2/multiproc.py b/demo/Tacotron2/multiproc.py deleted file mode 100644 index d3eb63ad..00000000 --- a/demo/Tacotron2/multiproc.py +++ /dev/null @@ -1,75 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import sys -import subprocess - -import torch - - -def main(): - argslist = list(sys.argv)[1:] - world_size = torch.cuda.device_count() - - if '--world-size' in argslist: - argslist[argslist.index('--world-size') + 1] = str(world_size) - else: - argslist.append('--world-size') - argslist.append(str(world_size)) - - workers = [] - - for i in range(world_size): - if '--rank' in argslist: - argslist[argslist.index('--rank') + 1] = str(i) - else: - argslist.append('--rank') - argslist.append(str(i)) - stdout = None if i == 0 else subprocess.DEVNULL - worker = subprocess.Popen( - [str(sys.executable)] + argslist, stdout=stdout) - workers.append(worker) - - returncode = 0 - try: - pending = len(workers) - while pending > 0: - for worker in workers: - try: - worker_returncode = worker.wait(1) - except subprocess.TimeoutExpired: - continue - pending -= 1 - if worker_returncode != 0: - if returncode != 1: - for worker in workers: - worker.terminate() - returncode = 1 - - except KeyboardInterrupt: - print('Pressed CTRL-C, TERMINATING') - for worker in workers: - worker.terminate() - for worker in workers: - worker.wait() - raise - - sys.exit(returncode) - - -if __name__ == "__main__": - main() diff --git a/demo/Tacotron2/phrases/phrase.txt b/demo/Tacotron2/phrases/phrase.txt deleted file mode 100644 index 8999934d..00000000 --- a/demo/Tacotron2/phrases/phrase.txt +++ /dev/null @@ -1 +0,0 @@ -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves. diff --git a/demo/Tacotron2/phrases/phrase_1_128.txt b/demo/Tacotron2/phrases/phrase_1_128.txt deleted file mode 100644 index 2bd87ff0..00000000 --- a/demo/Tacotron2/phrases/phrase_1_128.txt +++ /dev/null @@ -1 +0,0 @@ -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the diff --git a/demo/Tacotron2/phrases/phrase_1_256.txt b/demo/Tacotron2/phrases/phrase_1_256.txt deleted file mode 100644 index 8286058e..00000000 --- a/demo/Tacotron2/phrases/phrase_1_256.txt +++ /dev/null @@ -1,2 +0,0 @@ -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves and the form of printed letters should be beautiful, and that their arrangement on pages. - diff --git a/demo/Tacotron2/phrases/phrase_1_64.txt b/demo/Tacotron2/phrases/phrase_1_64.txt deleted file mode 100644 index 817a8a60..00000000 --- a/demo/Tacotron2/phrases/phrase_1_64.txt +++ /dev/null @@ -1 +0,0 @@ -She sells seashells by the seashore, shells she sells are great diff --git a/demo/Tacotron2/phrases/phrase_4_256.txt b/demo/Tacotron2/phrases/phrase_4_256.txt deleted file mode 100644 index 84de94bc..00000000 --- a/demo/Tacotron2/phrases/phrase_4_256.txt +++ /dev/null @@ -1,4 +0,0 @@ -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves and the form of printed letters should be beautiful, and that their arrangement on pages. -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves and the form of printed letters should be beautiful, and that their arrangement on pages. -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves and the form of printed letters should be beautiful, and that their arrangement on pages. -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves and the form of printed letters should be beautiful, and that their arrangement on pages. diff --git a/demo/Tacotron2/phrases/phrase_4_64.txt b/demo/Tacotron2/phrases/phrase_4_64.txt deleted file mode 100644 index cd1d75b5..00000000 --- a/demo/Tacotron2/phrases/phrase_4_64.txt +++ /dev/null @@ -1,4 +0,0 @@ -She sells seashells by the seashore, shells she sells are great -She sells seashells by the seashore, shells she sells are great -She sells seashells by the seashore, shells she sells are great -She sells seashells by the seashore, shells she sells are great diff --git a/demo/Tacotron2/phrases/phrase_8_256.txt b/demo/Tacotron2/phrases/phrase_8_256.txt deleted file mode 100644 index eace2b8e..00000000 --- a/demo/Tacotron2/phrases/phrase_8_256.txt +++ /dev/null @@ -1,8 +0,0 @@ -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves and the form of printed letters should be beautiful, and that their arrangement on pages. -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves and the form of printed letters should be beautiful, and that their arrangement on pages. -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves and the form of printed letters should be beautiful, and that their arrangement on pages. -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves and the form of printed letters should be beautiful, and that their arrangement on pages. -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves and the form of printed letters should be beautiful, and that their arrangement on pages. -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves and the form of printed letters should be beautiful, and that their arrangement on pages. -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves and the form of printed letters should be beautiful, and that their arrangement on pages. -The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves and the form of printed letters should be beautiful, and that their arrangement on pages. diff --git a/demo/Tacotron2/phrases/phrase_8_64.txt b/demo/Tacotron2/phrases/phrase_8_64.txt deleted file mode 100644 index e3a97a5c..00000000 --- a/demo/Tacotron2/phrases/phrase_8_64.txt +++ /dev/null @@ -1,8 +0,0 @@ -She sells seashells by the seashore, shells she sells are great -She sells seashells by the seashore, shells she sells are great -She sells seashells by the seashore, shells she sells are great -She sells seashells by the seashore, shells she sells are great -She sells seashells by the seashore, shells she sells are great -She sells seashells by the seashore, shells she sells are great -She sells seashells by the seashore, shells she sells are great -She sells seashells by the seashore, shells she sells are great diff --git a/demo/Tacotron2/preprocess_audio2mel.py b/demo/Tacotron2/preprocess_audio2mel.py deleted file mode 100644 index 32026325..00000000 --- a/demo/Tacotron2/preprocess_audio2mel.py +++ /dev/null @@ -1,81 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import torch - -from tacotron2.data_function import TextMelLoader -from common.utils import load_filepaths_and_text - -def parse_args(parser): - """ - Parse commandline arguments. - """ - parser.add_argument('-d', '--dataset-path', type=str, - default='./', help='Path to dataset') - parser.add_argument('--wav-files', required=True, - type=str, help='Path to filelist with audio paths and text') - parser.add_argument('--mel-files', required=True, - type=str, help='Path to filelist with mel paths and text') - parser.add_argument('--text-cleaners', nargs='*', - default=['english_cleaners'], type=str, - help='Type of text cleaners for input text') - parser.add_argument('--max-wav-value', default=32768.0, type=float, - help='Maximum audiowave value') - parser.add_argument('--sampling-rate', default=22050, type=int, - help='Sampling rate') - parser.add_argument('--filter-length', default=1024, type=int, - help='Filter length') - parser.add_argument('--hop-length', default=256, type=int, - help='Hop (stride) length') - parser.add_argument('--win-length', default=1024, type=int, - help='Window length') - parser.add_argument('--mel-fmin', default=0.0, type=float, - help='Minimum mel frequency') - parser.add_argument('--mel-fmax', default=8000.0, type=float, - help='Maximum mel frequency') - parser.add_argument('--n-mel-channels', default=80, type=int, - help='Number of bins in mel-spectrograms') - - return parser - - -def audio2mel(dataset_path, audiopaths_and_text, melpaths_and_text, args): - - melpaths_and_text_list = load_filepaths_and_text(dataset_path, melpaths_and_text) - audiopaths_and_text_list = load_filepaths_and_text(dataset_path, audiopaths_and_text) - - data_loader = TextMelLoader(dataset_path, audiopaths_and_text, args) - - for i in range(len(melpaths_and_text_list)): - if i%100 == 0: - print("done", i, "/", len(melpaths_and_text_list)) - - mel = data_loader.get_mel(audiopaths_and_text_list[i][0]) - torch.save(mel, melpaths_and_text_list[i][0]) - -def main(): - - parser = argparse.ArgumentParser(description='PyTorch Tacotron 2 Training') - parser = parse_args(parser) - args = parser.parse_args() - args.load_mel_from_disk = False - - audio2mel(args.dataset_path, args.wav_files, args.mel_files, args) - -if __name__ == '__main__': - main() diff --git a/demo/Tacotron2/requirements.txt b/demo/Tacotron2/requirements.txt deleted file mode 100644 index b6eb26de..00000000 --- a/demo/Tacotron2/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -numba>=0.48 -resampy>=0.3.1 -torch==2.0.1 -matplotlib -numpy -inflect -librosa>=0.10.0 -scipy -Unidecode -git+https://github.com/NVIDIA/dllogger#egg=dllogger ---extra-index-url https://pypi.ngc.nvidia.com -onnx-graphsurgeon diff --git a/demo/Tacotron2/run_latency_tests.sh b/demo/Tacotron2/run_latency_tests.sh deleted file mode 100644 index 85e5f0f8..00000000 --- a/demo/Tacotron2/run_latency_tests.sh +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -unset CUDA_VISIBLE_DEVICES -bash test_infer.sh -bs 1 -il 128 --fp16 --num-iters 1003 --tacotron2 ./checkpoints/tacotron2_1032590_6000_amp --waveglow ./checkpoints/waveglow_1076430_14000_amp --wn-channels 256 -bash test_infer.sh -bs 4 -il 128 --fp16 --num-iters 1003 --tacotron2 ./checkpoints/tacotron2_1032590_6000_amp --waveglow ./checkpoints/waveglow_1076430_14000_amp --wn-channels 256 -bash test_infer.sh -bs 1 -il 128 --num-iters 1003 --tacotron2 ./checkpoints/tacotron2_1032590_6000_amp --waveglow ./checkpoints/waveglow_1076430_14000_amp --wn-channels 256 -bash test_infer.sh -bs 4 -il 128 --num-iters 1003 --tacotron2 ./checkpoints/tacotron2_1032590_6000_amp --waveglow ./checkpoints/waveglow_1076430_14000_amp --wn-channels 256 -export CUDA_VISIBLE_DEVICES= -export OMP_NUM_THREADS=6 -export KMP_BLOCKTIME=0 -export KMP_AFFINITY=granularity=fine,compact,1,0 -bash test_infer.sh -bs 1 -il 128 --cpu --num-iters 1003 --tacotron2 ./checkpoints/tacotron2_1032590_6000_amp --waveglow ./checkpoints/waveglow_1076430_14000_amp --wn-channels 256 -bash test_infer.sh -bs 4 -il 128 --cpu --num-iters 1003 --tacotron2 ./checkpoints/tacotron2_1032590_6000_amp --waveglow ./checkpoints/waveglow_1076430_14000_amp --wn-channels 256 diff --git a/demo/Tacotron2/scripts/download_checkpoints.sh b/demo/Tacotron2/scripts/download_checkpoints.sh deleted file mode 100755 index 0d23f2d3..00000000 --- a/demo/Tacotron2/scripts/download_checkpoints.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Prepare the download directory -mkdir -p checkpoints && cd checkpoints - -# Download the Tacotron2 and Waveglow checkpoints -if [ ! -f "checkpoints/tacotron2_pyt_ckpt_amp_v19.09.0/nvidia_tacotron2pyt_fp16_20190427" ]; then - echo "Downloading Tacotron2 checkpoint from NGC" - ngc registry model download-version nvidia/tacotron2_pyt_ckpt_amp:19.09.0 -fi; -if [ ! -f "checkpoints/waveglow_ckpt_amp_256_v19.10.0/nvidia_waveglow256pyt_fp16" ]; then - echo "Downloading Waveglow checkpoint from NGC" - ngc registry model download-version nvidia/waveglow_ckpt_amp_256:19.10.0 -fi; - -cd - diff --git a/demo/Tacotron2/scripts/inference_benchmark.sh b/demo/Tacotron2/scripts/inference_benchmark.sh deleted file mode 100755 index 86200557..00000000 --- a/demo/Tacotron2/scripts/inference_benchmark.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -echo "TensorRT BS=1, S=128" -bash test_infer.sh --test tensorrt/test_infer_trt.py -bs 1 -il 128 --fp16 --num-iters 103 --encoder ./output/encoder_fp16.engine --decoder ./output/decoder_with_outer_loop_fp16.engine --postnet ./output/postnet_fp16.engine --waveglow ./output/waveglow_fp16.engine --wn-channels 256 -echo "PyTorch (GPU) BS=1, S=128" -bash test_infer.sh -bs 1 -il 128 --fp16 --num-iters 103 --tacotron2 ./checkpoints/tacotron2_pyt_ckpt_amp_v19.09.0/nvidia_tacotron2pyt_fp16_20190427 --waveglow ./checkpoints/waveglow_ckpt_amp_256_v19.10.0/nvidia_waveglow256pyt_fp16 --wn-channels 256 diff --git a/demo/Tacotron2/scripts/install_prerequisites.sh b/demo/Tacotron2/scripts/install_prerequisites.sh deleted file mode 100755 index 5a16d392..00000000 --- a/demo/Tacotron2/scripts/install_prerequisites.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -pip3 install -r requirements.txt -echo "nvidia" | sudo -S apt-get install -y libsndfile1 - -pushd /tmp -git clone https://github.com/NVIDIA/apex -cd apex -pip3 install -v --disable-pip-version-check --no-build-isolation --no-cache-dir ./ -popd diff --git a/demo/Tacotron2/scripts/prepare_dataset.sh b/demo/Tacotron2/scripts/prepare_dataset.sh deleted file mode 100755 index d38be817..00000000 --- a/demo/Tacotron2/scripts/prepare_dataset.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -e - -DATADIR="LJSpeech-1.1" -BZ2ARCHIVE="${DATADIR}.tar.bz2" -ENDPOINT="http://data.keithito.com/data/speech/$BZ2ARCHIVE" - -if [ ! -d "$DATADIR" ]; then - echo "dataset is missing, unpacking ..." - if [ ! -f "$BZ2ARCHIVE" ]; then - echo "dataset archive is missing, downloading ..." - wget "$ENDPOINT" - fi - tar jxvf "$BZ2ARCHIVE" -fi diff --git a/demo/Tacotron2/scripts/prepare_mels.sh b/demo/Tacotron2/scripts/prepare_mels.sh deleted file mode 100644 index b3843a26..00000000 --- a/demo/Tacotron2/scripts/prepare_mels.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -e - -DATADIR="LJSpeech-1.1" -FILELISTSDIR="filelists" - -TESTLIST="$FILELISTSDIR/ljs_audio_text_test_filelist.txt" -TRAINLIST="$FILELISTSDIR/ljs_audio_text_train_filelist.txt" -VALLIST="$FILELISTSDIR/ljs_audio_text_val_filelist.txt" - -TESTLIST_MEL="$FILELISTSDIR/ljs_mel_text_test_filelist.txt" -TRAINLIST_MEL="$FILELISTSDIR/ljs_mel_text_train_filelist.txt" -VALLIST_MEL="$FILELISTSDIR/ljs_mel_text_val_filelist.txt" - -mkdir -p "$DATADIR/mels" -if [ $(ls $DATADIR/mels | wc -l) -ne 13100 ]; then - python3 preprocess_audio2mel.py --wav-files "$TRAINLIST" --mel-files "$TRAINLIST_MEL" - python3 preprocess_audio2mel.py --wav-files "$TESTLIST" --mel-files "$TESTLIST_MEL" - python3 preprocess_audio2mel.py --wav-files "$VALLIST" --mel-files "$VALLIST_MEL" -fi diff --git a/demo/Tacotron2/tacotron2/arg_parser.py b/demo/Tacotron2/tacotron2/arg_parser.py deleted file mode 100644 index 2a450ef6..00000000 --- a/demo/Tacotron2/tacotron2/arg_parser.py +++ /dev/null @@ -1,98 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse - -from tacotron2.text import symbols - - -def parse_tacotron2_args(parent, add_help=False): - """ - Parse commandline arguments. - """ - parser = argparse.ArgumentParser(parents=[parent], add_help=add_help) - - # misc parameters - parser.add_argument('--mask-padding', default=False, type=bool, - help='Use mask padding') - parser.add_argument('--n-mel-channels', default=80, type=int, - help='Number of bins in mel-spectrograms') - - # symbols parameters - global symbols - len_symbols = len(symbols) - symbols = parser.add_argument_group('symbols parameters') - symbols.add_argument('--n-symbols', default=len_symbols, type=int, - help='Number of symbols in dictionary') - symbols.add_argument('--symbols-embedding-dim', default=512, type=int, - help='Input embedding dimension') - - # encoder parameters - encoder = parser.add_argument_group('encoder parameters') - encoder.add_argument('--encoder-kernel-size', default=5, type=int, - help='Encoder kernel size') - encoder.add_argument('--encoder-n-convolutions', default=3, type=int, - help='Number of encoder convolutions') - encoder.add_argument('--encoder-embedding-dim', default=512, type=int, - help='Encoder embedding dimension') - - # decoder parameters - decoder = parser.add_argument_group('decoder parameters') - decoder.add_argument('--n-frames-per-step', default=1, - type=int, - help='Number of frames processed per step') # currently only 1 is supported - decoder.add_argument('--decoder-rnn-dim', default=1024, type=int, - help='Number of units in decoder LSTM') - decoder.add_argument('--prenet-dim', default=256, type=int, - help='Number of ReLU units in prenet layers') - decoder.add_argument('--max-decoder-steps', default=2000, type=int, - help='Maximum number of output mel spectrograms') - decoder.add_argument('--gate-threshold', default=0.5, type=float, - help='Probability threshold for stop token') - decoder.add_argument('--p-attention-dropout', default=0.1, type=float, - help='Dropout probability for attention LSTM') - decoder.add_argument('--p-decoder-dropout', default=0.1, type=float, - help='Dropout probability for decoder LSTM') - decoder.add_argument('--decoder-no-early-stopping', action='store_true', - help='Stop decoding once all samples are finished') - - # attention parameters - attention = parser.add_argument_group('attention parameters') - attention.add_argument('--attention-rnn-dim', default=1024, type=int, - help='Number of units in attention LSTM') - attention.add_argument('--attention-dim', default=128, type=int, - help='Dimension of attention hidden representation') - - # location layer parameters - location = parser.add_argument_group('location parameters') - location.add_argument( - '--attention-location-n-filters', default=32, type=int, - help='Number of filters for location-sensitive attention') - location.add_argument( - '--attention-location-kernel-size', default=31, type=int, - help='Kernel size for location-sensitive attention') - - # Mel-post processing network parameters - postnet = parser.add_argument_group('postnet parameters') - postnet.add_argument('--postnet-embedding-dim', default=512, type=int, - help='Postnet embedding dimension') - postnet.add_argument('--postnet-kernel-size', default=5, type=int, - help='Postnet kernel size') - postnet.add_argument('--postnet-n-convolutions', default=5, type=int, - help='Number of postnet convolutions') - - return parser diff --git a/demo/Tacotron2/tacotron2/data_function.py b/demo/Tacotron2/tacotron2/data_function.py deleted file mode 100644 index 5d2c0064..00000000 --- a/demo/Tacotron2/tacotron2/data_function.py +++ /dev/null @@ -1,145 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import random -import numpy as np -import torch -import torch.utils.data - -import common.layers as layers -from common.utils import load_wav_to_torch, load_filepaths_and_text, to_gpu -from tacotron2.text import text_to_sequence - -class TextMelLoader(torch.utils.data.Dataset): - """ - 1) loads audio,text pairs - 2) normalizes text and converts them to sequences of one-hot vectors - 3) computes mel-spectrograms from audio files. - """ - def __init__(self, dataset_path, audiopaths_and_text, args): - self.audiopaths_and_text = load_filepaths_and_text(dataset_path, audiopaths_and_text) - self.text_cleaners = args.text_cleaners - self.max_wav_value = args.max_wav_value - self.sampling_rate = args.sampling_rate - self.load_mel_from_disk = args.load_mel_from_disk - self.stft = layers.TacotronSTFT( - args.filter_length, args.hop_length, args.win_length, - args.n_mel_channels, args.sampling_rate, args.mel_fmin, - args.mel_fmax) - random.seed(1234) - random.shuffle(self.audiopaths_and_text) - - def get_mel_text_pair(self, audiopath_and_text): - # separate filename and text - audiopath, text = audiopath_and_text[0], audiopath_and_text[1] - len_text = len(text) - text = self.get_text(text) - mel = self.get_mel(audiopath) - return (text, mel, len_text) - - def get_mel(self, filename): - if not self.load_mel_from_disk: - audio, sampling_rate = load_wav_to_torch(filename) - if sampling_rate != self.stft.sampling_rate: - raise ValueError("{} {} SR doesn't match target {} SR".format( - sampling_rate, self.stft.sampling_rate)) - audio_norm = audio / self.max_wav_value - audio_norm = audio_norm.unsqueeze(0) - audio_norm = torch.autograd.Variable(audio_norm, requires_grad=False) - melspec = self.stft.mel_spectrogram(audio_norm) - melspec = torch.squeeze(melspec, 0) - else: - melspec = torch.load(filename) - assert melspec.size(0) == self.stft.n_mel_channels, ( - 'Mel dimension mismatch: given {}, expected {}'.format( - melspec.size(0), self.stft.n_mel_channels)) - - return melspec - - def get_text(self, text): - text_norm = torch.IntTensor(text_to_sequence(text, self.text_cleaners)) - return text_norm - - def __getitem__(self, index): - return self.get_mel_text_pair(self.audiopaths_and_text[index]) - - def __len__(self): - return len(self.audiopaths_and_text) - - -class TextMelCollate(): - """ Zero-pads model inputs and targets based on number of frames per setep - """ - def __init__(self, n_frames_per_step): - self.n_frames_per_step = n_frames_per_step - - def __call__(self, batch): - """Collate's training batch from normalized text and mel-spectrogram - PARAMS - ------ - batch: [text_normalized, mel_normalized] - """ - # Right zero-pad all one-hot text sequences to max input length - input_lengths, ids_sorted_decreasing = torch.sort( - torch.LongTensor([len(x[0]) for x in batch]), - dim=0, descending=True) - max_input_len = input_lengths[0] - - text_padded = torch.LongTensor(len(batch), max_input_len) - text_padded.zero_() - for i in range(len(ids_sorted_decreasing)): - text = batch[ids_sorted_decreasing[i]][0] - text_padded[i, :text.size(0)] = text - - # Right zero-pad mel-spec - num_mels = batch[0][1].size(0) - max_target_len = max([x[1].size(1) for x in batch]) - if max_target_len % self.n_frames_per_step != 0: - max_target_len += self.n_frames_per_step - max_target_len % self.n_frames_per_step - assert max_target_len % self.n_frames_per_step == 0 - - # include mel padded and gate padded - mel_padded = torch.FloatTensor(len(batch), num_mels, max_target_len) - mel_padded.zero_() - gate_padded = torch.FloatTensor(len(batch), max_target_len) - gate_padded.zero_() - output_lengths = torch.LongTensor(len(batch)) - for i in range(len(ids_sorted_decreasing)): - mel = batch[ids_sorted_decreasing[i]][1] - mel_padded[i, :, :mel.size(1)] = mel - gate_padded[i, mel.size(1)-1:] = 1 - output_lengths[i] = mel.size(1) - - # count number of items - characters in text - len_x = [x[2] for x in batch] - len_x = torch.Tensor(len_x) - return text_padded, input_lengths, mel_padded, gate_padded, \ - output_lengths, len_x - -def batch_to_gpu(batch): - text_padded, input_lengths, mel_padded, gate_padded, \ - output_lengths, len_x = batch - text_padded = to_gpu(text_padded).long() - input_lengths = to_gpu(input_lengths).long() - max_len = torch.max(input_lengths.data).item() - mel_padded = to_gpu(mel_padded).float() - gate_padded = to_gpu(gate_padded).float() - output_lengths = to_gpu(output_lengths).long() - x = (text_padded, input_lengths, mel_padded, max_len, output_lengths) - y = (mel_padded, gate_padded) - len_x = torch.sum(output_lengths) - return (x, y, len_x) diff --git a/demo/Tacotron2/tacotron2/loss_function.py b/demo/Tacotron2/tacotron2/loss_function.py deleted file mode 100644 index 07b3610e..00000000 --- a/demo/Tacotron2/tacotron2/loss_function.py +++ /dev/null @@ -1,36 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from torch import nn - - -class Tacotron2Loss(nn.Module): - def __init__(self): - super(Tacotron2Loss, self).__init__() - - def forward(self, model_output, targets): - mel_target, gate_target = targets[0], targets[1] - mel_target.requires_grad = False - gate_target.requires_grad = False - gate_target = gate_target.view(-1, 1) - - mel_out, mel_out_postnet, gate_out, _ = model_output - gate_out = gate_out.view(-1, 1) - mel_loss = nn.MSELoss()(mel_out, mel_target) + \ - nn.MSELoss()(mel_out_postnet, mel_target) - gate_loss = nn.BCEWithLogitsLoss()(gate_out, gate_target) - return mel_loss + gate_loss diff --git a/demo/Tacotron2/tacotron2/model.py b/demo/Tacotron2/tacotron2/model.py deleted file mode 100644 index c8ba9f96..00000000 --- a/demo/Tacotron2/tacotron2/model.py +++ /dev/null @@ -1,681 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from math import sqrt -import torch -from torch import nn -from torch.nn import functional as F -import sys -from os.path import abspath, dirname -# enabling modules discovery from global entrypoint -sys.path.append(abspath(dirname(__file__)+'/../')) -from common.layers import ConvNorm, LinearNorm -from common.utils import to_gpu, get_mask_from_lengths - - -class LocationLayer(nn.Module): - def __init__(self, attention_n_filters, attention_kernel_size, - attention_dim): - super(LocationLayer, self).__init__() - padding = int((attention_kernel_size - 1) / 2) - self.location_conv = ConvNorm(2, attention_n_filters, - kernel_size=attention_kernel_size, - padding=padding, bias=False, stride=1, - dilation=1) - self.location_dense = LinearNorm(attention_n_filters, attention_dim, - bias=False, w_init_gain='tanh') - - def forward(self, attention_weights_cat): - processed_attention = self.location_conv(attention_weights_cat) - processed_attention = processed_attention.transpose(1, 2) - processed_attention = self.location_dense(processed_attention) - return processed_attention - - -class Attention(nn.Module): - def __init__(self, attention_rnn_dim, embedding_dim, - attention_dim, attention_location_n_filters, - attention_location_kernel_size): - super(Attention, self).__init__() - self.query_layer = LinearNorm(attention_rnn_dim, attention_dim, - bias=False, w_init_gain='tanh') - self.memory_layer = LinearNorm(embedding_dim, attention_dim, bias=False, - w_init_gain='tanh') - self.v = LinearNorm(attention_dim, 1, bias=False) - self.location_layer = LocationLayer(attention_location_n_filters, - attention_location_kernel_size, - attention_dim) - self.score_mask_value = -float("inf") - - def get_alignment_energies(self, query, processed_memory, - attention_weights_cat): - """ - PARAMS - ------ - query: decoder output (batch, n_mel_channels * n_frames_per_step) - processed_memory: processed encoder outputs (B, T_in, attention_dim) - attention_weights_cat: cumulative and prev. att weights (B, 2, max_time) - - RETURNS - ------- - alignment (batch, max_time) - """ - - processed_query = self.query_layer(query.unsqueeze(1)) - processed_attention_weights = self.location_layer(attention_weights_cat) - energies = self.v(torch.tanh( - processed_query + processed_attention_weights + processed_memory)) - - energies = energies.squeeze(2) - return energies - - def forward(self, attention_hidden_state, memory, processed_memory, - attention_weights_cat, mask): - """ - PARAMS - ------ - attention_hidden_state: attention rnn last output - memory: encoder outputs - processed_memory: processed encoder outputs - attention_weights_cat: previous and cummulative attention weights - mask: binary mask for padded data - """ - alignment = self.get_alignment_energies( - attention_hidden_state, processed_memory, attention_weights_cat) - - alignment = alignment.masked_fill(mask, self.score_mask_value) - - attention_weights = F.softmax(alignment, dim=1) - attention_context = torch.bmm(attention_weights.unsqueeze(1), memory) - attention_context = attention_context.squeeze(1) - - return attention_context, attention_weights - - -class Prenet(nn.Module): - def __init__(self, in_dim, sizes): - super(Prenet, self).__init__() - in_sizes = [in_dim] + sizes[:-1] - self.layers = nn.ModuleList( - [LinearNorm(in_size, out_size, bias=False) - for (in_size, out_size) in zip(in_sizes, sizes)]) - - def forward(self, x): - for linear in self.layers: - x = F.dropout(F.relu(linear(x)), p=0.5, training=True) - return x - - -class Postnet(nn.Module): - """Postnet - - Five 1-d convolution with 512 channels and kernel size 5 - """ - - def __init__(self, n_mel_channels, postnet_embedding_dim, - postnet_kernel_size, postnet_n_convolutions): - super(Postnet, self).__init__() - self.convolutions = nn.ModuleList() - - self.convolutions.append( - nn.Sequential( - ConvNorm(n_mel_channels, postnet_embedding_dim, - kernel_size=postnet_kernel_size, stride=1, - padding=int((postnet_kernel_size - 1) / 2), - dilation=1, w_init_gain='tanh'), - nn.BatchNorm1d(postnet_embedding_dim)) - ) - - for i in range(1, postnet_n_convolutions - 1): - self.convolutions.append( - nn.Sequential( - ConvNorm(postnet_embedding_dim, - postnet_embedding_dim, - kernel_size=postnet_kernel_size, stride=1, - padding=int((postnet_kernel_size - 1) / 2), - dilation=1, w_init_gain='tanh'), - nn.BatchNorm1d(postnet_embedding_dim)) - ) - - self.convolutions.append( - nn.Sequential( - ConvNorm(postnet_embedding_dim, n_mel_channels, - kernel_size=postnet_kernel_size, stride=1, - padding=int((postnet_kernel_size - 1) / 2), - dilation=1, w_init_gain='linear'), - nn.BatchNorm1d(n_mel_channels)) - ) - self.n_convs = len(self.convolutions) - - def forward(self, x): - i = 0 - for conv in self.convolutions: - if i < self.n_convs - 1: - x = F.dropout(torch.tanh(conv(x)), 0.5, training=self.training) - else: - x = F.dropout(conv(x), 0.5, training=self.training) - i += 1 - - return x - - -class Encoder(nn.Module): - """Encoder module: - - Three 1-d convolution banks - - Bidirectional LSTM - """ - def __init__(self, encoder_n_convolutions, - encoder_embedding_dim, encoder_kernel_size): - super(Encoder, self).__init__() - - convolutions = [] - for _ in range(encoder_n_convolutions): - conv_layer = nn.Sequential( - ConvNorm(encoder_embedding_dim, - encoder_embedding_dim, - kernel_size=encoder_kernel_size, stride=1, - padding=int((encoder_kernel_size - 1) / 2), - dilation=1, w_init_gain='relu'), - nn.BatchNorm1d(encoder_embedding_dim)) - convolutions.append(conv_layer) - self.convolutions = nn.ModuleList(convolutions) - - self.lstm = nn.LSTM(encoder_embedding_dim, - int(encoder_embedding_dim / 2), 1, - batch_first=True, bidirectional=True) - - @torch.jit.ignore - def forward(self, x, input_lengths): - for conv in self.convolutions: - x = F.dropout(F.relu(conv(x)), 0.5, self.training) - - x = x.transpose(1, 2) - - # pytorch tensor are not reversible, hence the conversion - input_lengths = input_lengths.cpu().numpy() - x = nn.utils.rnn.pack_padded_sequence( - x, input_lengths, batch_first=True) - - self.lstm.flatten_parameters() - outputs, _ = self.lstm(x) - - outputs, _ = nn.utils.rnn.pad_packed_sequence( - outputs, batch_first=True) - - return outputs - - @torch.jit.export - def infer(self, x, input_lengths): - device = x.device - for conv in self.convolutions: - x = F.dropout(F.relu(conv(x.to(device))), 0.5, self.training) - - x = x.transpose(1, 2) - - input_lengths = input_lengths.cpu() - x = nn.utils.rnn.pack_padded_sequence( - x, input_lengths, batch_first=True) - - outputs, _ = self.lstm(x) - - outputs, _ = nn.utils.rnn.pad_packed_sequence( - outputs, batch_first=True) - - return outputs - - -class Decoder(nn.Module): - def __init__(self, n_mel_channels, n_frames_per_step, - encoder_embedding_dim, attention_dim, - attention_location_n_filters, - attention_location_kernel_size, - attention_rnn_dim, decoder_rnn_dim, - prenet_dim, max_decoder_steps, gate_threshold, - p_attention_dropout, p_decoder_dropout, - early_stopping): - super(Decoder, self).__init__() - self.n_mel_channels = n_mel_channels - self.n_frames_per_step = n_frames_per_step - self.encoder_embedding_dim = encoder_embedding_dim - self.attention_rnn_dim = attention_rnn_dim - self.decoder_rnn_dim = decoder_rnn_dim - self.prenet_dim = prenet_dim - self.max_decoder_steps = max_decoder_steps - self.gate_threshold = gate_threshold - self.p_attention_dropout = p_attention_dropout - self.p_decoder_dropout = p_decoder_dropout - self.early_stopping = early_stopping - - self.prenet = Prenet( - n_mel_channels * n_frames_per_step, - [prenet_dim, prenet_dim]) - - self.attention_rnn = nn.LSTMCell( - prenet_dim + encoder_embedding_dim, - attention_rnn_dim) - - self.attention_layer = Attention( - attention_rnn_dim, encoder_embedding_dim, - attention_dim, attention_location_n_filters, - attention_location_kernel_size) - - self.decoder_rnn = nn.LSTMCell( - attention_rnn_dim + encoder_embedding_dim, - decoder_rnn_dim, 1) - - self.linear_projection = LinearNorm( - decoder_rnn_dim + encoder_embedding_dim, - n_mel_channels * n_frames_per_step) - - self.gate_layer = LinearNorm( - decoder_rnn_dim + encoder_embedding_dim, 1, - bias=True, w_init_gain='sigmoid') - - def get_go_frame(self, memory): - """ Gets all zeros frames to use as first decoder input - PARAMS - ------ - memory: decoder outputs - - RETURNS - ------- - decoder_input: all zeros frames - """ - B = memory.size(0) - dtype = memory.dtype - device = memory.device - decoder_input = torch.zeros( - B, self.n_mel_channels*self.n_frames_per_step, - dtype=dtype, device=device) - return decoder_input - - def initialize_decoder_states(self, memory): - """ Initializes attention rnn states, decoder rnn states, attention - weights, attention cumulative weights, attention context, stores memory - and stores processed memory - PARAMS - ------ - memory: Encoder outputs - mask: Mask for padded data if training, expects None for inference - """ - B = memory.size(0) - MAX_TIME = memory.size(1) - dtype = memory.dtype - device = memory.device - - attention_hidden = torch.zeros( - B, self.attention_rnn_dim, dtype=dtype, device=device) - attention_cell = torch.zeros( - B, self.attention_rnn_dim, dtype=dtype, device=device) - - decoder_hidden = torch.zeros( - B, self.decoder_rnn_dim, dtype=dtype, device=device) - decoder_cell = torch.zeros( - B, self.decoder_rnn_dim, dtype=dtype, device=device) - - attention_weights = torch.zeros( - B, MAX_TIME, dtype=dtype, device=device) - attention_weights_cum = torch.zeros( - B, MAX_TIME, dtype=dtype, device=device) - attention_context = torch.zeros( - B, self.encoder_embedding_dim, dtype=dtype, device=device) - - processed_memory = self.attention_layer.memory_layer(memory) - - return (attention_hidden, attention_cell, decoder_hidden, - decoder_cell, attention_weights, attention_weights_cum, - attention_context, processed_memory) - - def parse_decoder_inputs(self, decoder_inputs): - """ Prepares decoder inputs, i.e. mel outputs - PARAMS - ------ - decoder_inputs: inputs used for teacher-forced training, i.e. mel-specs - - RETURNS - ------- - inputs: processed decoder inputs - - """ - # (B, n_mel_channels, T_out) -> (B, T_out, n_mel_channels) - decoder_inputs = decoder_inputs.transpose(1, 2) - decoder_inputs = decoder_inputs.view( - decoder_inputs.size(0), - int(decoder_inputs.size(1)/self.n_frames_per_step), -1) - # (B, T_out, n_mel_channels) -> (T_out, B, n_mel_channels) - decoder_inputs = decoder_inputs.transpose(0, 1) - return decoder_inputs - - def parse_decoder_outputs(self, mel_outputs, gate_outputs, alignments): - """ Prepares decoder outputs for output - PARAMS - ------ - mel_outputs: - gate_outputs: gate output energies - alignments: - - RETURNS - ------- - mel_outputs: - gate_outpust: gate output energies - alignments: - """ - # (T_out, B) -> (B, T_out) - alignments = alignments.transpose(0, 1).contiguous() - # (T_out, B) -> (B, T_out) - gate_outputs = gate_outputs.transpose(0, 1).contiguous() - # (T_out, B, n_mel_channels) -> (B, T_out, n_mel_channels) - mel_outputs = mel_outputs.transpose(0, 1).contiguous() - # decouple frames per step - shape = (mel_outputs.shape[0], -1, self.n_mel_channels) - mel_outputs = mel_outputs.view(*shape) - # (B, T_out, n_mel_channels) -> (B, n_mel_channels, T_out) - mel_outputs = mel_outputs.transpose(1, 2) - - return mel_outputs, gate_outputs, alignments - - def decode(self, decoder_input, attention_hidden, attention_cell, - decoder_hidden, decoder_cell, attention_weights, - attention_weights_cum, attention_context, memory, - processed_memory, mask): - """ Decoder step using stored states, attention and memory - PARAMS - ------ - decoder_input: previous mel output - - RETURNS - ------- - mel_output: - gate_output: gate output energies - attention_weights: - """ - cell_input = torch.cat((decoder_input, attention_context), -1) - - attention_hidden, attention_cell = self.attention_rnn( - cell_input, (attention_hidden, attention_cell)) - attention_hidden = F.dropout( - attention_hidden, self.p_attention_dropout, self.training) - - attention_weights_cat = torch.cat( - (attention_weights.unsqueeze(1), - attention_weights_cum.unsqueeze(1)), dim=1) - attention_context, attention_weights = self.attention_layer( - attention_hidden, memory, processed_memory, - attention_weights_cat, mask) - - attention_weights_cum += attention_weights - decoder_input = torch.cat( - (attention_hidden, attention_context), -1) - - decoder_hidden, decoder_cell = self.decoder_rnn( - decoder_input, (decoder_hidden, decoder_cell)) - decoder_hidden = F.dropout( - decoder_hidden, self.p_decoder_dropout, self.training) - - decoder_hidden_attention_context = torch.cat( - (decoder_hidden, attention_context), dim=1) - decoder_output = self.linear_projection( - decoder_hidden_attention_context) - - gate_prediction = self.gate_layer(decoder_hidden_attention_context) - - return (decoder_output, gate_prediction, attention_hidden, - attention_cell, decoder_hidden, decoder_cell, attention_weights, - attention_weights_cum, attention_context) - - @torch.jit.ignore - def forward(self, memory, decoder_inputs, memory_lengths): - """ Decoder forward pass for training - PARAMS - ------ - memory: Encoder outputs - decoder_inputs: Decoder inputs for teacher forcing. i.e. mel-specs - memory_lengths: Encoder output lengths for attention masking. - - RETURNS - ------- - mel_outputs: mel outputs from the decoder - gate_outputs: gate outputs from the decoder - alignments: sequence of attention weights from the decoder - """ - - decoder_input = self.get_go_frame(memory).unsqueeze(0) - decoder_inputs = self.parse_decoder_inputs(decoder_inputs) - decoder_inputs = torch.cat((decoder_input, decoder_inputs), dim=0) - decoder_inputs = self.prenet(decoder_inputs) - - mask = get_mask_from_lengths(memory_lengths) - (attention_hidden, - attention_cell, - decoder_hidden, - decoder_cell, - attention_weights, - attention_weights_cum, - attention_context, - processed_memory) = self.initialize_decoder_states(memory) - - mel_outputs, gate_outputs, alignments = [], [], [] - while len(mel_outputs) < decoder_inputs.size(0) - 1: - decoder_input = decoder_inputs[len(mel_outputs)] - (mel_output, - gate_output, - attention_hidden, - attention_cell, - decoder_hidden, - decoder_cell, - attention_weights, - attention_weights_cum, - attention_context) = self.decode(decoder_input, - attention_hidden, - attention_cell, - decoder_hidden, - decoder_cell, - attention_weights, - attention_weights_cum, - attention_context, - memory, - processed_memory, - mask) - - mel_outputs += [mel_output.squeeze(1)] - gate_outputs += [gate_output.squeeze()] - alignments += [attention_weights] - - mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs( - torch.stack(mel_outputs), - torch.stack(gate_outputs), - torch.stack(alignments)) - - return mel_outputs, gate_outputs, alignments - - @torch.jit.export - def infer(self, memory, memory_lengths): - """ Decoder inference - PARAMS - ------ - memory: Encoder outputs - - RETURNS - ------- - mel_outputs: mel outputs from the decoder - gate_outputs: gate outputs from the decoder - alignments: sequence of attention weights from the decoder - """ - decoder_input = self.get_go_frame(memory) - - mask = get_mask_from_lengths(memory_lengths) - (attention_hidden, - attention_cell, - decoder_hidden, - decoder_cell, - attention_weights, - attention_weights_cum, - attention_context, - processed_memory) = self.initialize_decoder_states(memory) - - mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32, device=memory.device) - not_finished = torch.ones([memory.size(0)], dtype=torch.int32, device=memory.device) - - mel_outputs, gate_outputs, alignments = ( - torch.zeros(1), torch.zeros(1), torch.zeros(1)) - first_iter = True - while True: - decoder_input = self.prenet(decoder_input) - (mel_output, - gate_output, - attention_hidden, - attention_cell, - decoder_hidden, - decoder_cell, - attention_weights, - attention_weights_cum, - attention_context) = self.decode(decoder_input, - attention_hidden, - attention_cell, - decoder_hidden, - decoder_cell, - attention_weights, - attention_weights_cum, - attention_context, - memory, - processed_memory, - mask) - - if first_iter: - mel_outputs = mel_output.unsqueeze(0) - gate_outputs = gate_output - alignments = attention_weights - first_iter = False - else: - mel_outputs = torch.cat( - (mel_outputs, mel_output.unsqueeze(0)), dim=0) - gate_outputs = torch.cat((gate_outputs, gate_output), dim=0) - alignments = torch.cat((alignments, attention_weights), dim=0) - - dec = torch.le(torch.sigmoid(gate_output), - self.gate_threshold).to(torch.int32).squeeze(1) - - not_finished = not_finished*dec - mel_lengths += not_finished - - if self.early_stopping and torch.sum(not_finished) == 0: - break - if len(mel_outputs) == self.max_decoder_steps: - print("Warning! Reached max decoder steps") - break - - decoder_input = mel_output - - mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs( - mel_outputs, gate_outputs, alignments) - - return mel_outputs, gate_outputs, alignments, mel_lengths - - -class Tacotron2(nn.Module): - def __init__(self, mask_padding, n_mel_channels, - n_symbols, symbols_embedding_dim, encoder_kernel_size, - encoder_n_convolutions, encoder_embedding_dim, - attention_rnn_dim, attention_dim, attention_location_n_filters, - attention_location_kernel_size, n_frames_per_step, - decoder_rnn_dim, prenet_dim, max_decoder_steps, gate_threshold, - p_attention_dropout, p_decoder_dropout, - postnet_embedding_dim, postnet_kernel_size, - postnet_n_convolutions, decoder_no_early_stopping): - super(Tacotron2, self).__init__() - self.mask_padding = mask_padding - self.n_mel_channels = n_mel_channels - self.n_frames_per_step = n_frames_per_step - self.embedding = nn.Embedding(n_symbols, symbols_embedding_dim) - std = sqrt(2.0 / (n_symbols + symbols_embedding_dim)) - val = sqrt(3.0) * std # uniform bounds for std - self.embedding.weight.data.uniform_(-val, val) - self.encoder = Encoder(encoder_n_convolutions, - encoder_embedding_dim, - encoder_kernel_size) - self.decoder = Decoder(n_mel_channels, n_frames_per_step, - encoder_embedding_dim, attention_dim, - attention_location_n_filters, - attention_location_kernel_size, - attention_rnn_dim, decoder_rnn_dim, - prenet_dim, max_decoder_steps, - gate_threshold, p_attention_dropout, - p_decoder_dropout, - not decoder_no_early_stopping) - self.postnet = Postnet(n_mel_channels, postnet_embedding_dim, - postnet_kernel_size, - postnet_n_convolutions) - - def parse_batch(self, batch): - text_padded, input_lengths, mel_padded, gate_padded, \ - output_lengths = batch - text_padded = to_gpu(text_padded).long() - input_lengths = to_gpu(input_lengths).long() - max_len = torch.max(input_lengths.data).item() - mel_padded = to_gpu(mel_padded).float() - gate_padded = to_gpu(gate_padded).float() - output_lengths = to_gpu(output_lengths).long() - - return ( - (text_padded, input_lengths, mel_padded, max_len, output_lengths), - (mel_padded, gate_padded)) - - def parse_output(self, outputs, output_lengths): - # type: (List[Tensor], Tensor) -> List[Tensor] - if self.mask_padding and output_lengths is not None: - mask = get_mask_from_lengths(output_lengths) - mask = mask.expand(self.n_mel_channels, mask.size(0), mask.size(1)) - mask = mask.permute(1, 0, 2) - - outputs[0].masked_fill_(mask, 0.0) - outputs[1].masked_fill_(mask, 0.0) - outputs[2].masked_fill_(mask[:, 0, :], 1e3) # gate energies - - return outputs - - def forward(self, inputs): - inputs, input_lengths, targets, max_len, output_lengths = inputs - input_lengths, output_lengths = input_lengths.data, output_lengths.data - - embedded_inputs = self.embedding(inputs).transpose(1, 2) - - encoder_outputs = self.encoder(embedded_inputs, input_lengths) - - mel_outputs, gate_outputs, alignments = self.decoder( - encoder_outputs, targets, memory_lengths=input_lengths) - - mel_outputs_postnet = self.postnet(mel_outputs) - mel_outputs_postnet = mel_outputs + mel_outputs_postnet - - return self.parse_output( - [mel_outputs, mel_outputs_postnet, gate_outputs, alignments], - output_lengths) - - - def infer(self, inputs, input_lengths): - - embedded_inputs = self.embedding(inputs).transpose(1, 2) - encoder_outputs = self.encoder.infer(embedded_inputs, input_lengths) - mel_outputs, gate_outputs, alignments, mel_lengths = self.decoder.infer( - encoder_outputs, input_lengths) - - mel_outputs_postnet = self.postnet(mel_outputs) - mel_outputs_postnet = mel_outputs + mel_outputs_postnet - - BS = mel_outputs_postnet.size(0) - alignments = alignments.unfold(1, BS, BS).transpose(0,2) - - return mel_outputs_postnet, mel_lengths, alignments diff --git a/demo/Tacotron2/tacotron2/text/LICENCE b/demo/Tacotron2/tacotron2/text/LICENCE deleted file mode 100644 index 8ac1abf2..00000000 --- a/demo/Tacotron2/tacotron2/text/LICENCE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2017 Keith Ito - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/demo/Tacotron2/tacotron2/text/__init__.py b/demo/Tacotron2/tacotron2/text/__init__.py deleted file mode 100644 index f81bab41..00000000 --- a/demo/Tacotron2/tacotron2/text/__init__.py +++ /dev/null @@ -1,74 +0,0 @@ -""" from https://github.com/keithito/tacotron """ -import re -from tacotron2.text import cleaners -from tacotron2.text.symbols import symbols - - -# Mappings from symbol to numeric ID and vice versa: -_symbol_to_id = {s: i for i, s in enumerate(symbols)} -_id_to_symbol = {i: s for i, s in enumerate(symbols)} - -# Regular expression matching text enclosed in curly braces: -_curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)') - - -def text_to_sequence(text, cleaner_names): - '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text. - - The text can optionally have ARPAbet sequences enclosed in curly braces embedded - in it. For example, "Turn left on {HH AW1 S S T AH0 N} Street." - - Args: - text: string to convert to a sequence - cleaner_names: names of the cleaner functions to run the text through - - Returns: - List of integers corresponding to the symbols in the text - ''' - sequence = [] - - # Check for curly braces and treat their contents as ARPAbet: - while len(text): - m = _curly_re.match(text) - if not m: - sequence += _symbols_to_sequence(_clean_text(text, cleaner_names)) - break - sequence += _symbols_to_sequence(_clean_text(m.group(1), cleaner_names)) - sequence += _arpabet_to_sequence(m.group(2)) - text = m.group(3) - - return sequence - - -def sequence_to_text(sequence): - '''Converts a sequence of IDs back to a string''' - result = '' - for symbol_id in sequence: - if symbol_id in _id_to_symbol: - s = _id_to_symbol[symbol_id] - # Enclose ARPAbet back in curly braces: - if len(s) > 1 and s[0] == '@': - s = '{%s}' % s[1:] - result += s - return result.replace('}{', ' ') - - -def _clean_text(text, cleaner_names): - for name in cleaner_names: - cleaner = getattr(cleaners, name) - if not cleaner: - raise Exception('Unknown cleaner: %s' % name) - text = cleaner(text) - return text - - -def _symbols_to_sequence(symbols): - return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)] - - -def _arpabet_to_sequence(text): - return _symbols_to_sequence(['@' + s for s in text.split()]) - - -def _should_keep_symbol(s): - return s in _symbol_to_id and s is not '_' and s is not '~' diff --git a/demo/Tacotron2/tacotron2/text/cleaners.py b/demo/Tacotron2/tacotron2/text/cleaners.py deleted file mode 100644 index 4cbcb015..00000000 --- a/demo/Tacotron2/tacotron2/text/cleaners.py +++ /dev/null @@ -1,106 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -""" from https://github.com/keithito/tacotron """ - -''' -Cleaners are transformations that run over the input text at both training and eval time. - -Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners" -hyperparameter. Some cleaners are English-specific. You'll typically want to use: - 1. "english_cleaners" for English text - 2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using - the Unidecode library (https://pypi.python.org/pypi/Unidecode) - 3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update - the symbols in symbols.py to match your data). -''' - -import re -from unidecode import unidecode -from .numbers import normalize_numbers - - -# Regular expression matching whitespace: -_whitespace_re = re.compile(r'\s+') - -# List of (regular expression, replacement) pairs for abbreviations: -_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [ - ('mrs', 'misess'), - ('mr', 'mister'), - ('dr', 'doctor'), - ('st', 'saint'), - ('co', 'company'), - ('jr', 'junior'), - ('maj', 'major'), - ('gen', 'general'), - ('drs', 'doctors'), - ('rev', 'reverend'), - ('lt', 'lieutenant'), - ('hon', 'honorable'), - ('sgt', 'sergeant'), - ('capt', 'captain'), - ('esq', 'esquire'), - ('ltd', 'limited'), - ('col', 'colonel'), - ('ft', 'fort'), -]] - - -def expand_abbreviations(text): - for regex, replacement in _abbreviations: - text = re.sub(regex, replacement, text) - return text - - -def expand_numbers(text): - return normalize_numbers(text) - - -def lowercase(text): - return text.lower() - - -def collapse_whitespace(text): - return re.sub(_whitespace_re, ' ', text) - - -def convert_to_ascii(text): - return unidecode(text) - - -def basic_cleaners(text): - '''Basic pipeline that lowercases and collapses whitespace without transliteration.''' - text = lowercase(text) - text = collapse_whitespace(text) - return text - - -def transliteration_cleaners(text): - '''Pipeline for non-English text that transliterates to ASCII.''' - text = convert_to_ascii(text) - text = lowercase(text) - text = collapse_whitespace(text) - return text - - -def english_cleaners(text): - '''Pipeline for English text, including number and abbreviation expansion.''' - text = convert_to_ascii(text) - text = lowercase(text) - text = expand_numbers(text) - text = expand_abbreviations(text) - text = collapse_whitespace(text) - return text diff --git a/demo/Tacotron2/tacotron2/text/cmudict.py b/demo/Tacotron2/tacotron2/text/cmudict.py deleted file mode 100644 index b359b235..00000000 --- a/demo/Tacotron2/tacotron2/text/cmudict.py +++ /dev/null @@ -1,81 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -""" from https://github.com/keithito/tacotron """ - -import re - - -valid_symbols = [ - 'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1', 'AH2', - 'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0', 'AY1', 'AY2', - 'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0', 'ER1', 'ER2', 'EY', - 'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0', 'IH1', 'IH2', 'IY', 'IY0', 'IY1', - 'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 'OW0', 'OW1', 'OW2', 'OY', 'OY0', - 'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW', - 'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH' -] - -_valid_symbol_set = set(valid_symbols) - - -class CMUDict: - '''Thin wrapper around CMUDict data. http://www.speech.cs.cmu.edu/cgi-bin/cmudict''' - def __init__(self, file_or_path, keep_ambiguous=True): - if isinstance(file_or_path, str): - with open(file_or_path, encoding='latin-1') as f: - entries = _parse_cmudict(f) - else: - entries = _parse_cmudict(file_or_path) - if not keep_ambiguous: - entries = {word: pron for word, pron in entries.items() if len(pron) == 1} - self._entries = entries - - - def __len__(self): - return len(self._entries) - - - def lookup(self, word): - '''Returns list of ARPAbet pronunciations of the given word.''' - return self._entries.get(word.upper()) - - - -_alt_re = re.compile(r'\([0-9]+\)') - - -def _parse_cmudict(file): - cmudict = {} - for line in file: - if len(line) and (line[0] >= 'A' and line[0] <= 'Z' or line[0] == "'"): - parts = line.split(' ') - word = re.sub(_alt_re, '', parts[0]) - pronunciation = _get_pronunciation(parts[1]) - if pronunciation: - if word in cmudict: - cmudict[word].append(pronunciation) - else: - cmudict[word] = [pronunciation] - return cmudict - - -def _get_pronunciation(s): - parts = s.strip().split(' ') - for part in parts: - if part not in _valid_symbol_set: - return None - return ' '.join(parts) diff --git a/demo/Tacotron2/tacotron2/text/numbers.py b/demo/Tacotron2/tacotron2/text/numbers.py deleted file mode 100644 index 43df588d..00000000 --- a/demo/Tacotron2/tacotron2/text/numbers.py +++ /dev/null @@ -1,87 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -""" from https://github.com/keithito/tacotron """ - -import inflect -import re - - -_inflect = inflect.engine() -_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])') -_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)') -_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)') -_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)') -_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)') -_number_re = re.compile(r'[0-9]+') - - -def _remove_commas(m): - return m.group(1).replace(',', '') - - -def _expand_decimal_point(m): - return m.group(1).replace('.', ' point ') - - -def _expand_dollars(m): - match = m.group(1) - parts = match.split('.') - if len(parts) > 2: - return match + ' dollars' # Unexpected format - dollars = int(parts[0]) if parts[0] else 0 - cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0 - if dollars and cents: - dollar_unit = 'dollar' if dollars == 1 else 'dollars' - cent_unit = 'cent' if cents == 1 else 'cents' - return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit) - elif dollars: - dollar_unit = 'dollar' if dollars == 1 else 'dollars' - return '%s %s' % (dollars, dollar_unit) - elif cents: - cent_unit = 'cent' if cents == 1 else 'cents' - return '%s %s' % (cents, cent_unit) - else: - return 'zero dollars' - - -def _expand_ordinal(m): - return _inflect.number_to_words(m.group(0)) - - -def _expand_number(m): - num = int(m.group(0)) - if num > 1000 and num < 3000: - if num == 2000: - return 'two thousand' - elif num > 2000 and num < 2010: - return 'two thousand ' + _inflect.number_to_words(num % 100) - elif num % 100 == 0: - return _inflect.number_to_words(num // 100) + ' hundred' - else: - return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ') - else: - return _inflect.number_to_words(num, andword='') - - -def normalize_numbers(text): - text = re.sub(_comma_number_re, _remove_commas, text) - text = re.sub(_pounds_re, r'\1 pounds', text) - text = re.sub(_dollars_re, _expand_dollars, text) - text = re.sub(_decimal_number_re, _expand_decimal_point, text) - text = re.sub(_ordinal_re, _expand_ordinal, text) - text = re.sub(_number_re, _expand_number, text) - return text diff --git a/demo/Tacotron2/tacotron2/text/symbols.py b/demo/Tacotron2/tacotron2/text/symbols.py deleted file mode 100644 index 604626ec..00000000 --- a/demo/Tacotron2/tacotron2/text/symbols.py +++ /dev/null @@ -1,34 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -""" from https://github.com/keithito/tacotron """ - -''' -Defines the set of symbols used in text input to the model. - -The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. ''' -from tacotron2.text import cmudict - -_pad = '_' -_punctuation = '!\'(),.:;? ' -_special = '-' -_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' - -# Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters): -_arpabet = ['@' + s for s in cmudict.valid_symbols] - -# Export all symbols: -symbols = [_pad] + list(_special) + list(_punctuation) + list(_letters) + _arpabet diff --git a/demo/Tacotron2/tensorrt/convert_onnx2trt.py b/demo/Tacotron2/tensorrt/convert_onnx2trt.py deleted file mode 100644 index dd24c801..00000000 --- a/demo/Tacotron2/tensorrt/convert_onnx2trt.py +++ /dev/null @@ -1,168 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import sys -import tensorrt as trt -from os.path import join - -from trt_utils import build_engine, parse_dynamic_size - -def parse_args(parser): - """ - Parse commandline arguments. - """ - parser.add_argument('-o', '--output', required=True, - help='output folder to save audio (file per phrase)') - parser.add_argument('--encoder', type=str, default="", - help='full path to the Encoder ONNX') - parser.add_argument('--decoder', type=str, default="", - help='full path to the Decoder or DecoderIter ONNX.') - parser.add_argument('--postnet', type=str, default="", - help='full path to the Postnet ONNX') - parser.add_argument('--waveglow', type=str, default="", - help='full path to the WaveGlow ONNX') - parser.add_argument('--encoder_out', type=str, - help='Filename of the exported encoder engine') - parser.add_argument('--decoder_out', type=str, - help='Filename of the exported decoder engine') - parser.add_argument('--postnet_out', type=str, - help='Filename of the exported postnet engine') - parser.add_argument('--waveglow_out', type=str, - help='Filename of the exported waveglow engine') - parser.add_argument('--fp16', action='store_true', - help='inference with FP16') - parser.add_argument('-bs', '--batch-size', type=str, default="1", - help='One or three comma separated integers specifying the batch size. Specify "min,opt,max" for dynamic shape') - parser.add_argument('--mel-size', type=str, default="32,768,1664", - help='One or three comma separated integers specifying the mels size for waveglow.') - parser.add_argument('--z-size', type=str, default="1024,24576,53248", - help='One or three comma separated integers specifying the z size for waveglow.') - parser.add_argument('--loop', dest='loop', action='store_true', - help='Includes the outer decoder loop in the ONNX model. Enabled by default and only supported on TensorRT 8.0 or later.') - parser.add_argument('--no-loop', dest='loop', action='store_false', - help='Excludes outer decoder loop from decoder ONNX model. Default behavior and necessary for TensorRT 7.2 or earlier.') - parser.add_argument("-tcf", "--timing-cache-file", default=None, type=str, - help="Path to tensorrt build timeing cache file, only available for tensorrt 8.0 and later. The cache file is assumed to be used exclusively. It's the users' responsibility to create file lock to prevent accessing conflict.", - required=False) - parser.set_defaults(loop=int(trt.__version__[0]) >= 8) - return parser - - -def main(): - - parser = argparse.ArgumentParser( - description='Export from ONNX to TensorRT for Tacotron 2 and WaveGlow') - parser = parse_args(parser) - args = parser.parse_args() - - precision = "fp16" if args.fp16 else "fp32" - encoder_path = join(args.output, args.encoder_out if args.encoder_out else f"encoder_{precision}.engine") - decoder_path = join(args.output, args.decoder_out if args.decoder_out else f"decoder_with_outer_loop_{precision}.engine" if args.loop else f"decoder_iter_{precision}.engine") - postnet_path = join(args.output, args.postnet_out if args.postnet_out else f"postnet_{precision}.engine") - waveglow_path = join(args.output, args.waveglow_out if args.waveglow_out else f"waveglow_{precision}.engine") - - bs_min, bs_opt, bs_max = parse_dynamic_size(args.batch_size) - mel_min, mel_opt, mel_max = parse_dynamic_size(args.mel_size) - z_min, z_opt, z_max = parse_dynamic_size(args.z_size) - - # Encoder - shapes=[{"name": "sequences", "min": (bs_min,4), "opt": (bs_opt,128), "max": (bs_max,256)}, - {"name": "sequence_lengths", "min": (bs_min,), "opt": (bs_opt,), "max": (bs_max,)}] - if args.encoder != "": - print("Building Encoder ...") - encoder_engine = build_engine(args.encoder, shapes=shapes, fp16=args.fp16, timing_cache=args.timing_cache_file) - if encoder_engine is not None: - with open(encoder_path, 'wb') as f: - f.write(encoder_engine) - else: - print("Failed to build engine from", args.encoder) - sys.exit(1) - - if args.loop: - # Decoder - shapes=[{"name": "decoder_input_0", "min": (bs_min,80), "opt": (bs_opt,80), "max": (bs_max,80)}, - {"name": "attention_hidden_0", "min": (bs_min,1024), "opt": (bs_opt,1024), "max": (bs_max,1024)}, - {"name": "attention_cell_0", "min": (bs_min,1024), "opt": (bs_opt,1024), "max": (bs_max,1024)}, - {"name": "decoder_hidden_0", "min": (bs_min,1024), "opt": (bs_opt,1024), "max": (bs_max,1024)}, - {"name": "decoder_cell_0", "min": (bs_min,1024), "opt": (bs_opt,1024), "max": (bs_max,1024)}, - {"name": "attention_weights_0", "min": (bs_min,4), "opt": (bs_opt,128), "max": (bs_max,256)}, - {"name": "attention_weights_cum_0", "min": (bs_min,4), "opt": (bs_opt,128), "max": (bs_max,256)}, - {"name": "attention_context_0", "min": (bs_min,512), "opt": (bs_opt,512), "max": (bs_max,512)}, - {"name": "memory", "min": (bs_min,4,512), "opt": (bs_opt,128,512), "max": (bs_max,256,512)}, - {"name": "processed_memory", "min": (bs_min,4,128), "opt": (bs_opt,128,128), "max": (bs_max,256,128)}, - {"name": "mask", "min": (bs_min,4), "opt": (bs_opt,128), "max": (bs_max,256)}] - if args.decoder != "": - print("Building Decoder with loop...") - decoder_engine = build_engine(args.decoder, shapes=shapes, fp16=args.fp16, timing_cache=args.timing_cache_file) - if decoder_engine is not None: - with open(decoder_path, 'wb') as f: - f.write(decoder_engine) - else: - print("Failed to build engine from", args.decoder) - sys.exit(1) - else: - # DecoderIter - shapes=[{"name": "decoder_input", "min": (bs_min,80), "opt": (bs_opt,80), "max": (bs_max,80)}, - {"name": "attention_hidden", "min": (bs_min,1024), "opt": (bs_opt,1024), "max": (bs_max,1024)}, - {"name": "attention_cell", "min": (bs_min,1024), "opt": (bs_opt,1024), "max": (bs_max,1024)}, - {"name": "decoder_hidden", "min": (bs_min,1024), "opt": (bs_opt,1024), "max": (bs_max,1024)}, - {"name": "decoder_cell", "min": (bs_min,1024), "opt": (bs_opt,1024), "max": (bs_max,1024)}, - {"name": "attention_weights", "min": (bs_min,4), "opt": (bs_opt,128), "max": (bs_max,256)}, - {"name": "attention_weights_cum", "min": (bs_min,4), "opt": (bs_opt,128), "max": (bs_max,256)}, - {"name": "attention_context", "min": (bs_min,512), "opt": (bs_opt,512), "max": (bs_max,512)}, - {"name": "memory", "min": (bs_min,4,512), "opt": (bs_opt,128,512), "max": (bs_max,256,512)}, - {"name": "processed_memory", "min": (bs_min,4,128), "opt": (bs_opt,128,128), "max": (bs_max,256,128)}, - {"name": "mask", "min": (bs_min,4), "opt": (bs_opt,128), "max": (bs_max,256)}] - if args.decoder != "": - print("Building Decoder ...") - decoder_iter_engine = build_engine(args.decoder, shapes=shapes, fp16=args.fp16, timing_cache=args.timing_cache_file) - if decoder_iter_engine is not None: - with open(decoder_path, 'wb') as f: - f.write(decoder_iter_engine) - else: - print("Failed to build engine from", args.decoder) - sys.exit(1) - - # Postnet - shapes=[{"name": "mel_outputs", "min": (bs_min,80,32), "opt": (bs_opt,80,768), "max": (bs_max,80,1664)}] - if args.postnet != "": - print("Building Postnet ...") - postnet_engine = build_engine(args.postnet, shapes=shapes, fp16=args.fp16, timing_cache=args.timing_cache_file) - if postnet_engine is not None: - with open(postnet_path, 'wb') as f: - f.write(postnet_engine) - else: - print("Failed to build engine from", args.postnet) - sys.exit(1) - - # WaveGlow - shapes=[{"name": "mel", "min": (bs_min,80,mel_min,1), "opt": (bs_opt,80,mel_opt,1), "max": (bs_max,80,mel_max,1)}, - {"name": "z", "min": (bs_min,8,z_min,1), "opt": (bs_opt,8,z_opt,1), "max": (bs_max,8,z_max,1)}] - if args.waveglow != "": - print("Building WaveGlow ...") - waveglow_engine = build_engine(args.waveglow, shapes=shapes, fp16=args.fp16, timing_cache=args.timing_cache_file) - if waveglow_engine is not None: - with open(waveglow_path, 'wb') as f: - f.write(waveglow_engine) - else: - print("Failed to build engine from", args.waveglow) - sys.exit(1) - - -if __name__ == '__main__': - main() diff --git a/demo/Tacotron2/tensorrt/convert_tacotron22onnx.py b/demo/Tacotron2/tensorrt/convert_tacotron22onnx.py deleted file mode 100644 index 361a2221..00000000 --- a/demo/Tacotron2/tensorrt/convert_tacotron22onnx.py +++ /dev/null @@ -1,418 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import tensorrt -import torch -from torch import nn -from torch.nn import functional as F -import argparse - -import sys -import os -from pathlib import Path -sys.path.append(str(Path(__file__).parents[1])) - -import models -from inference import checkpoint_from_distributed, unwrap_distributed, load_and_setup_model, prepare_input_sequence -from common.utils import to_gpu, get_mask_from_lengths - -torch.backends.cudnn.enabled = True -def parse_args(parser): - """ - Parse commandline arguments. - """ - parser.add_argument('--tacotron2', type=str, required=True, - help='Full path to the Tacotron2 model checkpoint file') - parser.add_argument('-o', '--output', type=str, required=True, - help='Directory for the exported Tacotron2 ONNX models') - parser.add_argument('-e', '--encoder', type=str, required=False, default="encoder.onnx", - help='Filename for exported encoder ONNX model') - parser.add_argument('-d', '--decoder', type=str, required=False, default="decoder_iter.onnx", - help='Filename for exported decoder ONNX model') - parser.add_argument('-p', '--postnet', type=str, required=False, default="postnet.onnx", - help='Filename for exported postnet ONNX model') - parser.add_argument('--fp16', action='store_true', - help='Export with half precision to ONNX') - parser.add_argument('--loop', dest='loop', action='store_true', - help='Includes the outer decoder loop in the ONNX model. Enabled by default and only supported on TensorRT 8.0 or later.') - parser.add_argument('--no-loop', dest='loop', action='store_false', - help='Excludes outer decoder loop from decoder ONNX model. Default behavior and necessary for TensorRT 7.2 or earlier.') - parser.set_defaults(loop=int(tensorrt.__version__[0]) >= 8) - - return parser - - -def encoder_infer(self, x, input_lengths): - device = x.device - for conv in self.convolutions: - x = F.dropout(F.relu(conv(x.to(device))), 0.5, False) - - x = x.transpose(1, 2) - - x = nn.utils.rnn.pack_padded_sequence( - x, input_lengths, batch_first=True) - - outputs, _ = self.lstm(x) - - outputs, _ = nn.utils.rnn.pad_packed_sequence( - outputs, batch_first=True) - - lens = input_lengths*2 - - return outputs, lens - - -class Encoder(torch.nn.Module): - def __init__(self, tacotron2): - super(Encoder, self).__init__() - self.tacotron2 = tacotron2 - self.tacotron2.encoder.lstm.flatten_parameters() - self.infer = encoder_infer - - def forward(self, sequence, sequence_lengths): - embedded_inputs = self.tacotron2.embedding(sequence).transpose(1, 2) - memory, lens = self.infer(self.tacotron2.encoder, embedded_inputs, sequence_lengths) - processed_memory = self.tacotron2.decoder.attention_layer.memory_layer(memory) - return memory, processed_memory, lens - -class Postnet(torch.nn.Module): - def __init__(self, tacotron2): - super(Postnet, self).__init__() - self.tacotron2 = tacotron2 - - def forward(self, mel_outputs): - mel_outputs_postnet = self.tacotron2.postnet(mel_outputs) - return mel_outputs + mel_outputs_postnet - -def lstmcell2lstm_params(lstm_mod, lstmcell_mod): - lstm_mod.weight_ih_l0 = torch.nn.Parameter(lstmcell_mod.weight_ih) - lstm_mod.weight_hh_l0 = torch.nn.Parameter(lstmcell_mod.weight_hh) - lstm_mod.bias_ih_l0 = torch.nn.Parameter(lstmcell_mod.bias_ih) - lstm_mod.bias_hh_l0 = torch.nn.Parameter(lstmcell_mod.bias_hh) - - -def prenet_infer(self, x): - x1 = x[:] - for linear in self.layers: - x1 = F.relu(linear(x1)) - x0 = x1[0].unsqueeze(0) - mask = torch.le(torch.rand(256, device='cuda').to(x.dtype), 0.5).to(x.dtype) - mask = mask.expand(x1.size(0), x1.size(1)) - x1 = x1*mask*2.0 - - return x1 - -class DecoderIter(torch.nn.Module): - def __init__(self, tacotron2): - super(DecoderIter, self).__init__() - - self.tacotron2 = tacotron2 - dec = tacotron2.decoder - - self.p_attention_dropout = dec.p_attention_dropout - self.p_decoder_dropout = dec.p_decoder_dropout - self.prenet = dec.prenet - - self.prenet.infer = prenet_infer - - self.attention_rnn = nn.LSTM(dec.prenet_dim + dec.encoder_embedding_dim, - dec.attention_rnn_dim, 1) - lstmcell2lstm_params(self.attention_rnn, dec.attention_rnn) - self.attention_rnn.flatten_parameters() - - self.attention_layer = dec.attention_layer - - self.decoder_rnn = nn.LSTM(dec.attention_rnn_dim + dec.encoder_embedding_dim, - dec.decoder_rnn_dim, 1) - lstmcell2lstm_params(self.decoder_rnn, dec.decoder_rnn) - self.decoder_rnn.flatten_parameters() - - self.linear_projection = dec.linear_projection - self.gate_layer = dec.gate_layer - - - def decode(self, decoder_input, in_attention_hidden, in_attention_cell, - in_decoder_hidden, in_decoder_cell, in_attention_weights, - in_attention_weights_cum, in_attention_context, memory, - processed_memory, mask): - - cell_input = torch.cat((decoder_input, in_attention_context), -1) - - _, (out_attention_hidden, out_attention_cell) = self.attention_rnn( - cell_input.unsqueeze(0), (in_attention_hidden.unsqueeze(0), - in_attention_cell.unsqueeze(0))) - out_attention_hidden = out_attention_hidden.squeeze(0) - out_attention_cell = out_attention_cell.squeeze(0) - - out_attention_hidden = F.dropout( - out_attention_hidden, self.p_attention_dropout, False) - - attention_weights_cat = torch.cat( - (in_attention_weights.unsqueeze(1), - in_attention_weights_cum.unsqueeze(1)), dim=1) - out_attention_context, out_attention_weights = self.attention_layer( - out_attention_hidden, memory, processed_memory, - attention_weights_cat, mask) - - out_attention_weights_cum = in_attention_weights_cum + out_attention_weights - decoder_input_tmp = torch.cat( - (out_attention_hidden, out_attention_context), -1) - - _, (out_decoder_hidden, out_decoder_cell) = self.decoder_rnn( - decoder_input_tmp.unsqueeze(0), (in_decoder_hidden.unsqueeze(0), - in_decoder_cell.unsqueeze(0))) - out_decoder_hidden = out_decoder_hidden.squeeze(0) - out_decoder_cell = out_decoder_cell.squeeze(0) - - out_decoder_hidden = F.dropout( - out_decoder_hidden, self.p_decoder_dropout, False) - - decoder_hidden_attention_context = torch.cat( - (out_decoder_hidden, out_attention_context), 1) - - decoder_output = self.linear_projection( - decoder_hidden_attention_context) - - gate_prediction = self.gate_layer(decoder_hidden_attention_context) - - return (decoder_output, gate_prediction, out_attention_hidden, - out_attention_cell, out_decoder_hidden, out_decoder_cell, - out_attention_weights, out_attention_weights_cum, out_attention_context) - - # @torch.jit.script - def forward(self, - decoder_input, - attention_hidden, - attention_cell, - decoder_hidden, - decoder_cell, - attention_weights, - attention_weights_cum, - attention_context, - memory, - processed_memory, - mask): - decoder_input1 = self.prenet.infer(self.prenet, decoder_input) - outputs = self.decode(decoder_input1, - attention_hidden, - attention_cell, - decoder_hidden, - decoder_cell, - attention_weights, - attention_weights_cum, - attention_context, - memory, - processed_memory, - mask) - return outputs - - -def test_inference(encoder, decoder_iter, postnet): - - encoder.eval() - decoder_iter.eval() - postnet.eval() - - sys.path.append('./tensorrt') - from inference_trt import init_decoder_inputs - - texts = ["Hello World, good day."] - sequences, sequence_lengths = prepare_input_sequence(texts) - - measurements = {} - - print("Running Tacotron2 Encoder") - with torch.no_grad(): - memory, processed_memory, lens = encoder(sequences, sequence_lengths) - - print("Running Tacotron2 Decoder") - device = memory.device - dtype = memory.dtype - mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32, device = device) - not_finished = torch.ones([memory.size(0)], dtype=torch.int32, device = device) - mel_outputs, gate_outputs, alignments = (torch.zeros(1), torch.zeros(1), torch.zeros(1)) - gate_threshold = 0.6 - max_decoder_steps = 1000 - first_iter = True - - (decoder_input, attention_hidden, attention_cell, decoder_hidden, - decoder_cell, attention_weights, attention_weights_cum, - attention_context, memory, processed_memory, - mask) = init_decoder_inputs(memory, processed_memory, sequence_lengths) - - while True: - with torch.no_grad(): - (mel_output, gate_output, - attention_hidden, attention_cell, - decoder_hidden, decoder_cell, - attention_weights, attention_weights_cum, - attention_context) = decoder_iter(decoder_input, attention_hidden, attention_cell, decoder_hidden, - decoder_cell, attention_weights, attention_weights_cum, - attention_context, memory, processed_memory, mask) - - if first_iter: - mel_outputs = torch.unsqueeze(mel_output, 2) - gate_outputs = torch.unsqueeze(gate_output, 2) - alignments = torch.unsqueeze(attention_weights, 2) - first_iter = False - else: - mel_outputs = torch.cat((mel_outputs, torch.unsqueeze(mel_output, 2)), 2) - gate_outputs = torch.cat((gate_outputs, torch.unsqueeze(gate_output, 2)), 2) - alignments = torch.cat((alignments, torch.unsqueeze(attention_weights, 2)), 2) - - dec = torch.le(torch.sigmoid(gate_output), gate_threshold).to(torch.int32).squeeze(1) - not_finished = not_finished*dec - mel_lengths += not_finished - - if torch.sum(not_finished) == 0: - print("Stopping after ",mel_outputs.size(2)," decoder steps") - break - if mel_outputs.size(2) == max_decoder_steps: - print("Warning! Reached max decoder steps") - break - - decoder_input = mel_output - - - print("Running Tacotron2 PostNet") - with torch.no_grad(): - mel_outputs_postnet = postnet(mel_outputs) - - return mel_outputs_postnet - -def main(): - - parser = argparse.ArgumentParser( - description='PyTorch Tacotron 2 export to TRT') - parser = parse_args(parser) - args, _ = parser.parse_known_args() - - args.encoder = os.path.join(args.output, args.encoder) - args.decoder = os.path.join(args.output, args.decoder) - args.postnet = os.path.join(args.output, args.postnet) - - tacotron2 = load_and_setup_model('Tacotron2', parser, args.tacotron2, - fp16_run=args.fp16, cpu_run=False) - - opset_version = 10 - - sequences = torch.randint(low=0, high=148, size=(1,50), - dtype=torch.long).cuda() - sequence_lengths = torch.IntTensor([sequences.size(1)]) - dummy_input = (sequences, sequence_lengths) - - encoder = Encoder(tacotron2) - encoder.eval() - with torch.no_grad(): - encoder(*dummy_input) - - torch.onnx.export(encoder, dummy_input, args.encoder, - opset_version=opset_version, - do_constant_folding=True, - input_names=["sequences", "sequence_lengths"], - output_names=["memory", "processed_memory", "lens"], - dynamic_axes={"sequences": {0: "batch_size", 1: "text_seq"}, - "sequence_lengths": {0: "batch_size"}, - "memory": {0: "batch_size", 1: "mem_seq"}, - "processed_memory": {0: "batch_size", 1: "mem_seq"}, - "lens": {0: "batch_size"} - }) - - decoder_iter = DecoderIter(tacotron2) - memory = torch.randn((1,sequence_lengths[0],512)).cuda() #encoder_outputs - if args.fp16: - memory = memory.half() - memory_lengths = sequence_lengths.cuda() - # initialize decoder states for dummy_input - decoder_input = tacotron2.decoder.get_go_frame(memory) - mask = get_mask_from_lengths(memory_lengths) - (attention_hidden, - attention_cell, - decoder_hidden, - decoder_cell, - attention_weights, - attention_weights_cum, - attention_context, - processed_memory) = tacotron2.decoder.initialize_decoder_states(memory) - dummy_input = (decoder_input, - attention_hidden, - attention_cell, - decoder_hidden, - decoder_cell, - attention_weights, - attention_weights_cum, - attention_context, - memory, - processed_memory, - mask) - - decoder_iter = DecoderIter(tacotron2) - decoder_iter.eval() - with torch.no_grad(): - decoder_iter(*dummy_input) - - torch.onnx.export(decoder_iter, dummy_input, args.decoder, - opset_version=opset_version, - do_constant_folding=True, - input_names=["decoder_input", - "attention_hidden", - "attention_cell", - "decoder_hidden", - "decoder_cell", - "attention_weights", - "attention_weights_cum", - "attention_context", - "memory", - "processed_memory", - "mask"], - output_names=["decoder_output", - "gate_prediction", - "out_attention_hidden", - "out_attention_cell", - "out_decoder_hidden", - "out_decoder_cell", - "out_attention_weights", - "out_attention_weights_cum", - "out_attention_context"], - dynamic_axes={"attention_weights" : {0: "batch_size", 1: "seq_len"}, - "attention_weights_cum" : {0: "batch_size", 1: "seq_len"}, - "memory" : {0: "batch_size", 1: "seq_len"}, - "processed_memory" : {0: "batch_size", 1: "seq_len"}, - "mask" : {0: "batch_size", 1: "seq_len"}, - "out_attention_weights" : {0: "batch_size", 1: "seq_len"}, - "out_attention_weights_cum" : {0: "batch_size", 1: "seq_len"} - }) - - if args.loop: - from generate_decoder import insert_decoder_loop - decoder_dir = os.path.dirname(os.path.abspath(args.decoder)) - insert_decoder_loop(args.decoder, decoder_dir, os.path.basename(args.decoder).replace("_iter", ""), args.fp16) - - postnet = Postnet(tacotron2) - dummy_input = torch.randn((1,80,620)).cuda() - if args.fp16: - dummy_input = dummy_input.half() - torch.onnx.export(postnet, dummy_input, args.postnet, - opset_version=opset_version, - do_constant_folding=True, - input_names=["mel_outputs"], - output_names=["mel_outputs_postnet"], - dynamic_axes={"mel_outputs": {0: "batch_size", 2: "mel_seq"}, - "mel_outputs_postnet": {0: "batch_size", 2: "mel_seq"}}) - -if __name__ == '__main__': - main() diff --git a/demo/Tacotron2/tensorrt/convert_waveglow2onnx.py b/demo/Tacotron2/tensorrt/convert_waveglow2onnx.py deleted file mode 100644 index 4b9aecbc..00000000 --- a/demo/Tacotron2/tensorrt/convert_waveglow2onnx.py +++ /dev/null @@ -1,167 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import torch -import argparse -import os -import sys -from pathlib import Path -sys.path.append(str(Path(__file__).parents[1])) - -from common.utils import ParseFromConfigFile -from inference import load_and_setup_model - -def convert_convinv_1d_to_2d(convinv): - """ - Takes an invertible 1x1 1-d convolution and returns a 2-d convolution that does - the inverse - """ - conv2d = torch.nn.Conv2d(convinv.W_inverse.size(1), - convinv.W_inverse.size(0), - 1, bias=False) - conv2d.weight.data[:,:,:,0] = convinv.W_inverse.data - return conv2d - - -def convert_conv_1d_to_2d(conv1d): - conv2d = torch.nn.Conv2d(conv1d.weight.size(1), - conv1d.weight.size(0), - (conv1d.weight.size(2), 1), - stride=(conv1d.stride[0], 1), - dilation=(conv1d.dilation[0], 1), - padding=(conv1d.padding[0], 0)) - conv2d.weight.data[:,:,:,0] = conv1d.weight.data - conv2d.bias.data = conv1d.bias.data - return conv2d - - -def convert_WN_1d_to_2d_(WN): - """ - Modifies the WaveNet like affine coupling layer in-place to use 2-d convolutions - """ - WN.start = convert_conv_1d_to_2d(WN.start) - WN.end = convert_conv_1d_to_2d(WN.end) - - for i in range(len(WN.in_layers)): - WN.in_layers[i] = convert_conv_1d_to_2d(WN.in_layers[i]) - - for i in range(len(WN.res_skip_layers)): - WN.res_skip_layers[i] = convert_conv_1d_to_2d(WN.res_skip_layers[i]) - - for i in range(len(WN.res_skip_layers)): - WN.cond_layers[i] = convert_conv_1d_to_2d(WN.cond_layers[i]) - - -def convert_1d_to_2d_(glow): - """ - Caffe2 and TensorRT don't seem to support 1-d convolutions or properly - convert ONNX exports with 1d convolutions to 2d convolutions yet, so we - do the conversion to 2-d convolutions before ONNX export - """ - # Convert upsample to 2d - upsample = torch.nn.ConvTranspose2d(glow.upsample.weight.size(0), - glow.upsample.weight.size(1), - (glow.upsample.weight.size(2), 1), - stride=(glow.upsample.stride[0], 1)) - upsample.weight.data[:,:,:,0] = glow.upsample.weight.data - upsample.bias.data = glow.upsample.bias.data - glow.upsample = upsample.cuda() - - # Convert WN to 2d - for WN in glow.WN: - convert_WN_1d_to_2d_(WN) - - # Convert invertible conv to 2d - for i in range(len(glow.convinv)): - glow.convinv[i] = convert_convinv_1d_to_2d(glow.convinv[i]) - - glow.cuda() - -def parse_args(parser): - """ - Parse commandline arguments. - """ - parser.add_argument('--waveglow', type=str, required=True, - help='full path to the WaveGlow model checkpoint file') - parser.add_argument('-o', '--output', type=str, required=True, - help='Directory or file name for the exported WaveGlow ONNX model') - parser.add_argument('--fp16', action='store_true', - help='inference with AMP') - parser.add_argument('-s', '--sigma-infer', default=0.6, type=float) - - parser.add_argument('--config-file', action=ParseFromConfigFile, - type=str, help='Path to configuration file') - - return parser - - -def export_onnx(parser, args): - - waveglow = load_and_setup_model('WaveGlow', parser, args.waveglow, - fp16_run=args.fp16, cpu_run=False, - forward_is_infer=False) - - # 80 mel channels, 620 mel spectrograms ~ 7 seconds of speech - mel = torch.randn(1, 80, 620).cuda() - stride = 256 # value from waveglow upsample - n_group = 8 - z_size2 = (mel.size(2)*stride)//n_group - z = torch.randn(1, n_group, z_size2, 1).cuda() - - if args.fp16: - mel = mel.half() - z = z.half() - with torch.no_grad(): - # run inference to force calculation of inverses - waveglow.infer(mel, sigma=args.sigma_infer) - - convert_1d_to_2d_(waveglow) - mel = mel.unsqueeze(3) - - # export to ONNX - if args.fp16: - waveglow = waveglow.half() - - waveglow.forward = waveglow.infer_onnx - - opset_version = 11 - - if os.path.isdir(args.output): - output_path = os.path.join(args.output, "waveglow.onnx") - else: - output_path = args.output - - torch.onnx.export(waveglow, (mel, z), output_path, - opset_version=opset_version, - do_constant_folding=True, - input_names=["mel", "z"], - output_names=["audio"], - dynamic_axes={"mel": {0: "batch_size", 2: "mel_seq"}, - "z": {0: "batch_size", 2: "z_seq"}, - "audio": {0: "batch_size", 1: "audio_seq"}}) - - -def main(): - parser = argparse.ArgumentParser( - description='PyTorch Tacotron 2 Inference') - parser = parse_args(parser) - args, _ = parser.parse_known_args() - - export_onnx(parser, args) - -if __name__ == '__main__': - main() diff --git a/demo/Tacotron2/tensorrt/generate_decoder.py b/demo/Tacotron2/tensorrt/generate_decoder.py deleted file mode 100644 index 62f8b04e..00000000 --- a/demo/Tacotron2/tensorrt/generate_decoder.py +++ /dev/null @@ -1,212 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import onnx_graphsurgeon as gs -import onnx -import sys -import os -import numpy as np -import argparse - -def insert_decoder_loop(decoder_iter_onnx_path, output_dir, decoder_out_name, fp16): - float_prec = np.float16 if fp16 else np.float32 - - # Modify loop body so that it has 2+N inputs: (iteration_num, condition, loop carried dependencies...) - # and 1+N+K outputs: (condition, loop carried dependencies..., scan_outputs...) - - # In this case, the loop carried dependencies include the following IN ORDER - # - decoder_output/decoder_input - # - attention_hidden - # - attention_cell - # - decoder_hidden - # - decoder_cell - # - attention_weights - # - attention_weights_cum - # - attention_context - # - not_finished (bool tensor, initialized to all True) - # - mel_lengths - - # The following are NOT loop carried dependencies (they remain constant through the loop), and must be moved to be inputs outside of the loop body - # - memory - # - processed_memory - # - mask - - # The scan outputs are - # - mel_outputs (which scans across decoder_output) - # - gate_outputs (scans across gate_prediction) - # - alignments (scans across attention_weights) - - - loop_body = gs.import_onnx(onnx.load(decoder_iter_onnx_path)) - loop_tensors = loop_body.tensors() - - iteration_num = gs.Variable("iteration_num", dtype=np.int64, shape=()) - cond_in = gs.Variable("cond_in", dtype=bool, shape=()) - cond_out = gs.Variable("cond_out", dtype=bool, shape=()) - not_finished_in = gs.Variable("not_finished_in", shape=('batch_size', 1), dtype=bool) - not_finished_out = gs.Variable("not_finished_out", shape=('batch_size', 1), dtype=bool) - mel_lengths_in = gs.Variable("mel_lengths_in", shape=('batch_size', 1), dtype=np.int32) - mel_lengths_out = gs.Variable("mel_lengths_out", shape=('batch_size', 1), dtype=np.int32) - - - # Set loop body inputs in the correct order - loop_body.inputs = [iteration_num, cond_in, loop_tensors["decoder_input"], loop_tensors["attention_hidden"], loop_tensors["attention_cell"], loop_tensors["decoder_hidden"], loop_tensors["decoder_cell"], loop_tensors["attention_weights"], loop_tensors["attention_weights_cum"], loop_tensors["attention_context"], not_finished_in, mel_lengths_in] - - # Set loop body outputs in the correct order - loop_body.outputs = [cond_out, loop_tensors["decoder_output"], loop_tensors["out_attention_hidden"], loop_tensors["out_attention_cell"], loop_tensors["out_decoder_hidden"], loop_tensors["out_decoder_cell"], loop_tensors["out_attention_weights"], loop_tensors["out_attention_weights_cum"], loop_tensors["out_attention_context"], not_finished_out, mel_lengths_out, loop_tensors["decoder_output"], loop_tensors["gate_prediction"], loop_tensors["out_attention_weights"]] - - # The loop stop condition is given by the following lines in PyTorch - # dec = torch.le(torch.sigmoid(decoder_outputs[8]), gate_threshold).to(torch.int32).squeeze(1) - # not_finished = not_finished*dec - # if torch.sum(not_finished) == 0: - # break - - # To compute cond_out, we can essentially follow the same steps. Using Less instead of Greater+Not for now - - gate_threshold = gs.Constant("gate_threshold", np.array([0.5], dtype=float_prec)) - gate_sigmoid = gs.Variable("gate_sigmoid", dtype=float_prec, shape=()) - sigmoid = loop_body.nodes.append(gs.Node(op="Sigmoid", inputs=[loop_tensors["gate_prediction"]], outputs=[gate_sigmoid])) - - leq_output = gs.Variable("leq_output", dtype=bool) - leq = loop_body.nodes.append(gs.Node(op="Less", inputs=[gate_sigmoid, gate_threshold], outputs=[leq_output])) - - loop_body.nodes.append(gs.Node(op="And", inputs=[not_finished_in, leq_output], outputs=[not_finished_out])) - - cast_output = gs.Variable("cast_output", dtype=np.int32) - loop_body.nodes.append(gs.Node(op="Cast", inputs=[not_finished_out], outputs=[cast_output], attrs={"to": 6})) # int32 - - reduce_output = gs.Variable("reduce_output", dtype=np.int32) - loop_body.nodes.append( gs.Node(op="ReduceSum", inputs=[cast_output], outputs=[reduce_output], attrs={"axes": [0], "keepdims": 0})) - - unsqueezed_cond_out = gs.Variable("unsqueezed_cond_out", dtype=bool) - loop_body.nodes.append(gs.Node(op="Equal", inputs=[reduce_output, gs.Constant("zero", np.array(0, dtype=np.int32))], outputs=[unsqueezed_cond_out])) - - squeezed_cond_out = gs.Variable("squeezed_cond_out", dtype=bool) - loop_body.nodes.append(gs.Node(op="Squeeze", inputs=[unsqueezed_cond_out], outputs=[squeezed_cond_out], attrs={"axes": [0]})) - - loop_body.nodes.append(gs.Node(op="Not", inputs=[squeezed_cond_out], outputs=[cond_out])) - - # Compute mel_lengths - # from PyTorch: mel_lengths += not_finished - - loop_body.nodes.append(gs.Node(op="Add", inputs=[mel_lengths_in, cast_output], outputs=[mel_lengths_out])) - - memory = gs.Variable("memory", dtype=float_prec, shape=('batch_size', 'seq_len', 512)) - processed_memory = gs.Variable("processed_memory", dtype=float_prec, shape=('batch_size', 'seq_len', 128)) - mask = gs.Variable("mask", dtype=bool, shape=('batch_size', 'seq_len')) - - loop_body.toposort() - onnx.save(gs.export_onnx(loop_body), os.path.join(output_dir, "loop_body_{prec}.onnx".format(prec="fp16" if float_prec == np.float16 else "fp32"))) - - # Create outer graph - - # Inputs to outer graph are the following (suffixed with _0 to signify initial states) - # - decoder_input_0 - # - attention_hidden_0 - # - attention_cell_0 - # - decoder_hidden_0 - # - decoder_cell_0 - # - attention_weights_0 - # - attention_weights_cum_0 - # - attention_context_0 - # - memory - # - processed_memory - # - mask - - # Outputs are the following - # - mel_outputs - # - mel_lengths - - # Note: alignments and gate_outputs are scan outputs, but don't seem to be used later in the PyTorch implementation. For now, we will make them intermediate tensors that are not outputted - - graph = gs.Graph() - - decoder_input_0 = gs.Variable("decoder_input_0", dtype=float_prec, shape=('batch_size', 80)) - attention_hidden_0 = gs.Variable("attention_hidden_0", dtype=float_prec, shape=('batch_size', 1024)) - attention_cell_0 = gs.Variable("attention_cell_0", dtype=float_prec, shape=('batch_size', 1024)) - decoder_hidden_0 = gs.Variable("decoder_hidden_0", dtype=float_prec, shape=('batch_size', 1024)) - decoder_cell_0 = gs.Variable("decoder_cell_0", dtype=float_prec, shape=('batch_size', 1024)) - attention_weights_0 = gs.Variable("attention_weights_0", dtype=float_prec, shape=('batch_size', 'seq_len')) - attention_weights_cum_0 = gs.Variable("attention_weights_cum_0", dtype=float_prec, shape=('batch_size', 'seq_len')) - attention_context_0 = gs.Variable("attention_context_0", dtype=float_prec, shape=('batch_size', 512)) - not_finished_0 = gs.Variable("not_finished_0", dtype=bool) - mel_lengths_0 = gs.Variable("mel_lengths_0", dtype=np.int32) - - # For not_finished, we need to generate a tensor of shape (batch_size) that is all 1s - # We can use the ONNX ConstantOfShape op to do this - not_finished_shape = gs.Variable("not_finished_shape", dtype=np.int64) - reduced = gs.Variable("reduced", dtype=float_prec) - graph.nodes.append(gs.Node(op="ReduceSum", inputs=[decoder_input_0], outputs=[reduced], attrs={"axes":[1], "keepdims": 1})) - graph.nodes.append(gs.Node(op="Shape", inputs=[reduced], outputs=[not_finished_shape])) - before_cast = gs.Variable("before_cast", dtype=np.int32) - graph.nodes.append(gs.Node(op="ConstantOfShape", inputs=[not_finished_shape], outputs=[before_cast], attrs={"value":gs.Constant("one", np.array([1], dtype=np.int32))})) - graph.nodes.append(gs.Node(op="Cast", inputs=[before_cast], outputs=[not_finished_0], attrs={"to": 9})) - - # Same thing for mel_lengths, but we need all 0s - graph.nodes.append(gs.Node(op="ConstantOfShape", inputs=[not_finished_shape], outputs=[mel_lengths_0], attrs={"value":gs.Constant("zero", np.array([0], dtype=np.int32))})) - - # Loop carried dependecies at the end of the loop - decoder_input_t = gs.Variable("decoder_input_t", dtype=float_prec, shape=('batch_size', 80)) - attention_hidden_t = gs.Variable("attention_hidden_t", dtype=float_prec, shape=('batch_size', 1024)) - attention_cell_t = gs.Variable("attention_cell_t", dtype=float_prec, shape=('batch_size', 1024)) - decoder_hidden_t = gs.Variable("decoder_hidden_t", dtype=float_prec, shape=('batch_size', 1024)) - decoder_cell_t = gs.Variable("decoder_cell_t", dtype=float_prec, shape=('batch_size', 1024)) - attention_weights_t = gs.Variable("attention_weights_t", dtype=float_prec, shape=('batch_size', 'seq_len')) - attention_weights_cum_t = gs.Variable("attention_weights_cum_t", dtype=float_prec, shape=('batch_size', 'seq_len')) - attention_context_t = gs.Variable("attention_context_t", dtype=float_prec, shape=('batch_size', 512)) - not_finished_t = gs.Variable("not_finished_t", dtype=bool) - mel_lengths_t = gs.Variable("mel_lengths_t", dtype=np.int32, shape=('batch_size', 1)) - - # Scan outputs - mel_outputs_raw = gs.Variable("mel_outputs_raw", dtype=float_prec, shape=(-1, 'batch_size', 80)) - gate_outputs = gs.Variable("gate_outputs", dtype=float_prec, shape=(-1, 'batch_size', 1)) - alignments = gs.Variable("alignments", dtype=float_prec, shape=(-1, 1, 'seq_len')) - - mel_outputs = gs.Variable("mel_outputs", dtype=float_prec, shape=('batch_size', 80, -1)) - - graph.inputs = [decoder_input_0, attention_hidden_0, attention_cell_0, decoder_hidden_0, decoder_cell_0, attention_weights_0, attention_weights_cum_0, attention_context_0, memory, processed_memory, mask] - graph.outputs = [mel_outputs, mel_lengths_t] - - trip_count = gs.Constant("trip_count", np.array(0, dtype=np.int64)) # In ONNX, this is an optional parameter, but I don't think ONNX-GS supports optional inputs. To fix this, after we export the ONNX ModelProto from GS, we replace this input with "" - initial_cond = gs.Constant("initial_cond", np.array(True, dtype=bool)) - loop_inputs = [trip_count, initial_cond, decoder_input_0, attention_hidden_0, attention_cell_0, decoder_hidden_0, decoder_cell_0, attention_weights_0, attention_weights_cum_0, attention_context_0, not_finished_0, mel_lengths_0] - loop_outputs = [decoder_input_t, attention_hidden_t, attention_cell_t, decoder_hidden_t, decoder_cell_t, attention_weights_t, attention_weights_cum_t, attention_context_t, not_finished_t, mel_lengths_t, mel_outputs_raw, gate_outputs, alignments] - decoder_loop = gs.Node(op="Loop", name="decoder_loop", inputs=loop_inputs, outputs=loop_outputs, attrs={"body": loop_body}) - graph.nodes.append(decoder_loop) - - graph.nodes.append(gs.Node(op="Transpose", inputs=[mel_outputs_raw], outputs=[mel_outputs], attrs={"perm": [1, 2, 0]})) # Output needs to have loop dimension as inner-most dim - - graph.toposort() - exported_graph = gs.export_onnx(graph) - [x for x in exported_graph.graph.node if x.name == "decoder_loop"][0].input[0] = "" # Remove trip count input - - onnx.save(exported_graph, os.path.join(output_dir, decoder_out_name)) - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('model_path', type=str, - help='path to original decoder_iter ONNX model') - parser.add_argument('-o', '--output_dir', type=str, default='.', help='Output directory') - parser.add_argument('--decoder_out', type=str, help='Filename of the exported decoder with outer loop') - parser.add_argument('--fp16', action='store_true') - - args = parser.parse_args() - - if args.decoder_out == None: - args.decoder_out = "decoder_with_outer_loop_{}.onnx".format("fp16" if args.fp16 else "fp32") - - insert_decoder_loop(args.model_path, args.output_dir, args.decoder_out, args.fp16) diff --git a/demo/Tacotron2/tensorrt/inference_trt.py b/demo/Tacotron2/tensorrt/inference_trt.py deleted file mode 100644 index d1a6dabd..00000000 --- a/demo/Tacotron2/tensorrt/inference_trt.py +++ /dev/null @@ -1,491 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import tensorrt as trt -import numpy as np -from scipy.io.wavfile import write -import time -import torch -import argparse -import os.path as path - -import sys -from pathlib import Path -sys.path.append(str(Path(__file__).parents[1])) - -from common.utils import to_gpu, get_mask_from_lengths -from tacotron2.text import text_to_sequence -from inference import MeasureTime, prepare_input_sequence, load_and_setup_model -import dllogger as DLLogger -from dllogger import StdOutBackend, JSONStreamBackend, Verbosity -from trt_utils import load_engine, run_trt_engine - -from waveglow.denoiser import Denoiser - -def parse_args(parser): - """ - Parse commandline arguments. - """ - parser.add_argument('-i', '--input', type=str, required=True, - help='full path to the input text (phareses separated by new line)') - parser.add_argument('-o', '--output', required=True, - help='output folder to save audio (file per phrase)') - parser.add_argument('--encoder', type=str, required=True, - help='full path to the Encoder engine') - parser.add_argument('--decoder', type=str, required=True, - help='full path to the DecoderIter engine') - parser.add_argument('--postnet', type=str, required=True, - help='full path to the Postnet engine') - parser.add_argument('--waveglow', type=str, required=True, - help='full path to the WaveGlow engine') - parser.add_argument('--waveglow-ckpt', type=str, default="", - help='full path to the WaveGlow model checkpoint file') - parser.add_argument('--log-file', type=str, default='nvlog.json', - help='Filename for logging') - parser.add_argument('-d', '--denoising-strength', default=0.01, type=float) - parser.add_argument('-sr', '--sampling-rate', default=22050, type=int, - help='Sampling rate') - parser.add_argument('--stft-hop-length', type=int, default=256, - help='STFT hop length for estimating audio length from mel size') - parser.add_argument('--fp16', action='store_true', - help='inference with FP16') - parser.add_argument('--loop', dest='loop', action='store_true', - help='Includes the outer decoder loop in the ONNX model. Enabled by default and only supported on TensorRT 8.0 or later.') - parser.add_argument('--no-loop', dest='loop', action='store_false', - help='Excludes outer decoder loop from decoder ONNX model. Default behavior and necessary for TensorRT 7.2 or earlier.') - parser.set_defaults(loop=int(trt.__version__[0]) >= 8) - parser.add_argument('--waveglow-onnxruntime', action='store_true', - help='Specify this option to use ONNX runtime instead of TRT for running Waveglow') - parser.add_argument('--decoder-onnxruntime', action='store_true', - help='Specify this option to use ONNX runtime instead of TRT for running the TT2 Decoder with loop. When using this option, pass the decoder ONNX model to the --decoder argument') - return parser - - -def init_decoder_inputs(memory, processed_memory, memory_lengths): - - device = memory.device - dtype = memory.dtype - bs = memory.size(0) - seq_len = memory.size(1) - attention_rnn_dim = 1024 - decoder_rnn_dim = 1024 - encoder_embedding_dim = 512 - n_mel_channels = 80 - - attention_hidden = torch.zeros(bs, attention_rnn_dim, device=device, dtype=dtype) - attention_cell = torch.zeros(bs, attention_rnn_dim, device=device, dtype=dtype) - decoder_hidden = torch.zeros(bs, decoder_rnn_dim, device=device, dtype=dtype) - decoder_cell = torch.zeros(bs, decoder_rnn_dim, device=device, dtype=dtype) - attention_weights = torch.zeros(bs, seq_len, device=device, dtype=dtype) - attention_weights_cum = torch.zeros(bs, seq_len, device=device, dtype=dtype) - attention_context = torch.zeros(bs, encoder_embedding_dim, device=device, dtype=dtype) - mask = get_mask_from_lengths(memory_lengths).to(device) - decoder_input = torch.zeros(bs, n_mel_channels, device=device, dtype=dtype) - - return (decoder_input, attention_hidden, attention_cell, decoder_hidden, - decoder_cell, attention_weights, attention_weights_cum, - attention_context, memory, processed_memory, mask) - -def init_decoder_outputs(memory, memory_lengths): - - device = memory.device - dtype = memory.dtype - bs = memory.size(0) - seq_len = memory.size(1) - attention_rnn_dim = 1024 - decoder_rnn_dim = 1024 - encoder_embedding_dim = 512 - n_mel_channels = 80 - - attention_hidden = torch.zeros(bs, attention_rnn_dim, device=device, dtype=dtype) - attention_cell = torch.zeros(bs, attention_rnn_dim, device=device, dtype=dtype) - decoder_hidden = torch.zeros(bs, decoder_rnn_dim, device=device, dtype=dtype) - decoder_cell = torch.zeros(bs, decoder_rnn_dim, device=device, dtype=dtype) - attention_weights = torch.zeros(bs, seq_len, device=device, dtype=dtype) - attention_weights_cum = torch.zeros(bs, seq_len, device=device, dtype=dtype) - attention_context = torch.zeros(bs, encoder_embedding_dim, device=device, dtype=dtype) - decoder_output = torch.zeros(bs, n_mel_channels, device=device, dtype=dtype) - gate_prediction = torch.zeros(bs, 1, device=device, dtype=dtype) - - return (attention_hidden, attention_cell, decoder_hidden, - decoder_cell, attention_weights, attention_weights_cum, - attention_context, decoder_output, gate_prediction) - -def init_decoder_tensors(decoder_inputs, decoder_outputs): - - decoder_tensors = { - "inputs" : { - 'decoder_input': decoder_inputs[0], - 'attention_hidden': decoder_inputs[1], - 'attention_cell': decoder_inputs[2], - 'decoder_hidden': decoder_inputs[3], - 'decoder_cell': decoder_inputs[4], - 'attention_weights': decoder_inputs[5], - 'attention_weights_cum': decoder_inputs[6], - 'attention_context': decoder_inputs[7], - 'memory': decoder_inputs[8], - 'processed_memory': decoder_inputs[9], - 'mask': decoder_inputs[10] - }, - "outputs" : { - 'out_attention_hidden': decoder_outputs[0], - 'out_attention_cell': decoder_outputs[1], - 'out_decoder_hidden': decoder_outputs[2], - 'out_decoder_cell': decoder_outputs[3], - 'out_attention_weights': decoder_outputs[4], - 'out_attention_weights_cum': decoder_outputs[5], - 'out_attention_context': decoder_outputs[6], - 'decoder_output': decoder_outputs[7], - 'gate_prediction': decoder_outputs[8] - } - } - return decoder_tensors - -def swap_inputs_outputs(decoder_inputs, decoder_outputs): - - new_decoder_inputs = (decoder_outputs[7], # decoder_output - decoder_outputs[0], # attention_hidden - decoder_outputs[1], # attention_cell - decoder_outputs[2], # decoder_hidden - decoder_outputs[3], # decoder_cell - decoder_outputs[4], # attention_weights - decoder_outputs[5], # attention_weights_cum - decoder_outputs[6], # attention_context - decoder_inputs[8], # memory - decoder_inputs[9], # processed_memory - decoder_inputs[10]) # mask - - new_decoder_outputs = (decoder_inputs[1], # attention_hidden - decoder_inputs[2], # attention_cell - decoder_inputs[3], # decoder_hidden - decoder_inputs[4], # decoder_cell - decoder_inputs[5], # attention_weights - decoder_inputs[6], # attention_weights_cum - decoder_inputs[7], # attention_context - decoder_inputs[0], # decoder_input - decoder_outputs[8])# gate_output - - return new_decoder_inputs, new_decoder_outputs - - -def infer_tacotron2_trt(encoder, decoder_iter, postnet, - encoder_context, decoder_context, postnet_context, - sequences, sequence_lengths, measurements, fp16, loop): - - batch_size = len(sequence_lengths) - max_sequence_len = sequence_lengths[0] - memory = torch.zeros((batch_size, max_sequence_len, 512)).cuda() - if fp16: - memory = memory.half() - device = memory.device - dtype = memory.dtype - - processed_memory = torch.zeros((batch_size, max_sequence_len, 128), device=device, dtype=dtype) - lens = torch.zeros_like(sequence_lengths) - print(f"batch_size: {batch_size}, max sequence length: {max_sequence_len}") - - encoder_tensors = { - "inputs" : - {'sequences': sequences, 'sequence_lengths': sequence_lengths}, - "outputs" : - {'memory': memory, 'lens': lens, 'processed_memory': processed_memory} - } - - print("Running Tacotron2 Encoder") - with MeasureTime(measurements, "tacotron2_encoder_time"): - run_trt_engine(encoder_context, encoder, encoder_tensors) - max_decoder_steps = 1024 - device = memory.device - mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32, device = device) - not_finished = torch.ones([memory.size(0)], dtype=torch.int32, device = device) - mel_outputs = torch.ones((batch_size, 80, max_decoder_steps), device = device, dtype=dtype).cuda() - gate_threshold = 0.5 - first_iter = True - - decoder_inputs = init_decoder_inputs(memory, processed_memory, sequence_lengths) - decoder_outputs = init_decoder_outputs(memory, sequence_lengths) - - if loop: - if decoder_context is None: - print("Running Tacotron2 Decoder with loop with ONNX-RT") - decoder_inputs_onnxrt = [x.cpu().numpy().copy() for x in decoder_inputs] - import onnx - import onnxruntime - sess = onnxruntime.InferenceSession(decoder_iter) - - with MeasureTime(measurements, "tacotron2_decoder_time"): - result = sess.run(["mel_outputs", "mel_lengths_t"], { - 'decoder_input_0': decoder_inputs_onnxrt[0], - 'attention_hidden_0': decoder_inputs_onnxrt[1], - 'attention_cell_0': decoder_inputs_onnxrt[2], - 'decoder_hidden_0': decoder_inputs_onnxrt[3], - 'decoder_cell_0': decoder_inputs_onnxrt[4], - 'attention_weights_0': decoder_inputs_onnxrt[5], - 'attention_weights_cum_0': decoder_inputs_onnxrt[6], - 'attention_context_0': decoder_inputs_onnxrt[7], - 'memory': decoder_inputs_onnxrt[8], - 'processed_memory': decoder_inputs_onnxrt[9], - 'mask': decoder_inputs_onnxrt[10] - }) - - mel_outputs = torch.tensor(result[0], device=device) - mel_lengths = torch.tensor(result[1], device=device) - else: - print("Running Tacotron2 Decoder with loop") - decoder_tensors = { - "inputs" : - { - 'decoder_input_0': decoder_inputs[0], - 'attention_hidden_0': decoder_inputs[1], - 'attention_cell_0': decoder_inputs[2], - 'decoder_hidden_0': decoder_inputs[3], - 'decoder_cell_0': decoder_inputs[4], - 'attention_weights_0': decoder_inputs[5], - 'attention_weights_cum_0': decoder_inputs[6], - 'attention_context_0': decoder_inputs[7], - 'memory': decoder_inputs[8], - 'processed_memory': decoder_inputs[9], - 'mask': decoder_inputs[10] - }, - "outputs" : - {'mel_outputs': mel_outputs, 'mel_lengths_t': mel_lengths} - } - - with MeasureTime(measurements, "tacotron2_decoder_time"): - run_trt_engine(decoder_context, decoder_iter, decoder_tensors) - mel_outputs = mel_outputs[:,:,:torch.max(mel_lengths)] - - else: - print("Running Tacotron2 Decoder") - measurements_decoder = {} - while True: - decoder_tensors = init_decoder_tensors(decoder_inputs, decoder_outputs) - with MeasureTime(measurements_decoder, "step"): - run_trt_engine(decoder_context, decoder_iter, decoder_tensors) - - if first_iter: - mel_outputs = torch.unsqueeze(decoder_outputs[7], 2) - gate_outputs = torch.unsqueeze(decoder_outputs[8], 2) - alignments = torch.unsqueeze(decoder_outputs[4], 2) - measurements['tacotron2_decoder_time'] = measurements_decoder['step'] - first_iter = False - else: - mel_outputs = torch.cat((mel_outputs, torch.unsqueeze(decoder_outputs[7], 2)), 2) - gate_outputs = torch.cat((gate_outputs, torch.unsqueeze(decoder_outputs[8], 2)), 2) - alignments = torch.cat((alignments, torch.unsqueeze(decoder_outputs[4], 2)), 2) - measurements['tacotron2_decoder_time'] += measurements_decoder['step'] - - dec = torch.le(torch.sigmoid(decoder_outputs[8]), gate_threshold).to(torch.int32).squeeze(1) - not_finished = not_finished*dec - mel_lengths += not_finished - - if torch.sum(not_finished) == 0: - print("Stopping after",mel_outputs.size(2),"decoder steps") - break - if mel_outputs.size(2) == max_decoder_steps: - print("Warning! Reached max decoder steps") - break - - decoder_inputs, decoder_outputs = swap_inputs_outputs(decoder_inputs, decoder_outputs) - - mel_outputs = mel_outputs.clone().detach() - mel_outputs_postnet = torch.zeros_like(mel_outputs, device=device, dtype=dtype) - - postnet_tensors = { - "inputs" : - {'mel_outputs': mel_outputs}, - "outputs" : - {'mel_outputs_postnet': mel_outputs_postnet} - } - print("Running Tacotron2 Postnet") - with MeasureTime(measurements, "tacotron2_postnet_time"): - run_trt_engine(postnet_context, postnet, postnet_tensors) - - print("Tacotron2 Postnet done") - - return mel_outputs_postnet, mel_lengths - - -def infer_waveglow_trt(waveglow, waveglow_context, mel, measurements, fp16): - - mel_size = mel.size(2) - batch_size = mel.size(0) - stride = 256 - n_group = 8 - z_size = mel_size*stride - z_size = z_size//n_group - z = torch.randn(batch_size, n_group, z_size).cuda() - audios = torch.zeros(batch_size, mel_size*stride).cuda() - - mel = mel.unsqueeze(3) - z = z.unsqueeze(3) - - if fp16: - z = z.half() - mel = mel.half() - audios = audios.half() - - waveglow_tensors = { - "inputs" : {'mel': mel, 'z': z}, - "outputs" : {'audio': audios} - } - - print("Running WaveGlow with TensorRT") - with MeasureTime(measurements, "waveglow_time"): - run_trt_engine(waveglow_context, waveglow, waveglow_tensors) - - return audios - -def infer_waveglow_onnx(waveglow_path, mel, measurements, fp16): - import onnx - import onnxruntime - sess = onnxruntime.InferenceSession(waveglow_path) - - device=mel.device - mel_size = mel.size(2) - batch_size = mel.size(0) - stride = 256 - n_group = 8 - z_size = mel_size*stride - z_size = z_size//n_group - z = torch.randn(batch_size, n_group, z_size).cuda() - - mel = mel.unsqueeze(3) - z = z.unsqueeze(3) - - if fp16: - z = z.half() - mel = mel.half() - - mel = mel.cpu().numpy().copy() - z = z.cpu().numpy().copy() - - print("Running WaveGlow with ONNX Runtime") - with MeasureTime(measurements, "waveglow_time"): - result = sess.run(["audio"], { - 'mel': mel, - 'z': z - }) - audios = torch.tensor(result[0], device=device) - return audios - -def main(): - - parser = argparse.ArgumentParser( - description='TensorRT Tacotron 2 Inference') - parser = parse_args(parser) - args, _ = parser.parse_known_args() - - # initialize CUDA state - torch.cuda.init() - - TRT_LOGGER = trt.Logger(trt.Logger.WARNING) - encoder = load_engine(args.encoder, TRT_LOGGER) - postnet = load_engine(args.postnet, TRT_LOGGER) - - if args.waveglow_ckpt != "": - # setup denoiser using WaveGlow PyTorch checkpoint - waveglow_ckpt = load_and_setup_model('WaveGlow', parser, args.waveglow_ckpt, - True, forward_is_infer=True) - denoiser = Denoiser(waveglow_ckpt).cuda() - # after initialization, we don't need WaveGlow PyTorch checkpoint - # anymore - deleting - del waveglow_ckpt - torch.cuda.empty_cache() - - # create TRT contexts for each engine - encoder_context = encoder.create_execution_context() - decoder_context = None - if not args.decoder_onnxruntime: - decoder_iter = load_engine(args.decoder, TRT_LOGGER) - decoder_context = decoder_iter.create_execution_context() - else: - decoder_iter = args.decoder - postnet_context = postnet.create_execution_context() - - waveglow_context = None - if not args.waveglow_onnxruntime: - waveglow = load_engine(args.waveglow, TRT_LOGGER) - waveglow_context = waveglow.create_execution_context() - - DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, - path.join(args.output, args.log_file)), - StdOutBackend(Verbosity.VERBOSE)]) - - texts = [] - try: - f = open(args.input, 'r') - texts = f.readlines() - except: - print("Could not read file") - sys.exit(1) - - measurements = {} - - sequences, sequence_lengths = prepare_input_sequence(texts) - dt = encoder.get_tensor_dtype("sequences") - sequences = sequences.to(torch.int64 if dt == trt.DataType.INT64 else torch.int32) - sequence_lengths = sequence_lengths.to(torch.int32) - - with MeasureTime(measurements, "latency"): - mel, mel_lengths = infer_tacotron2_trt(encoder, decoder_iter, postnet, - encoder_context, decoder_context, postnet_context, - sequences, sequence_lengths, measurements, args.fp16, args.loop) - audios = infer_waveglow_onnx(args.waveglow, mel, measurements, args.fp16) if args.waveglow_onnxruntime else \ - infer_waveglow_trt(waveglow, waveglow_context, mel, measurements, args.fp16) - - with encoder_context, postnet_context: - pass - - if decoder_context is not None: - with decoder_context: pass - - if waveglow_context is not None: - with waveglow_context: pass - - audios = audios.float() - if args.waveglow_ckpt != "": - with MeasureTime(measurements, "denoiser"): - audios = denoiser(audios, strength=args.denoising_strength).squeeze(1) - - for i, audio in enumerate(audios): - audio = audio[:mel_lengths[i]*args.stft_hop_length] - audio = audio/torch.max(torch.abs(audio)) - audio_path = path.join(args.output, f"audio_{i}_trt.wav") - write(audio_path, args.sampling_rate, audio.cpu().numpy()) - - - DLLogger.log(step=0, data={"tacotron2_encoder_latency": measurements['tacotron2_encoder_time']}) - DLLogger.log(step=0, data={"tacotron2_decoder_latency": measurements['tacotron2_decoder_time']}) - DLLogger.log(step=0, data={"tacotron2_postnet_latency": measurements['tacotron2_postnet_time']}) - DLLogger.log(step=0, data={"waveglow_latency": measurements['waveglow_time']}) - DLLogger.log(step=0, data={"latency": measurements['latency']}) - - if args.waveglow_ckpt != "": - DLLogger.log(step=0, data={"denoiser": measurements['denoiser']}) - DLLogger.flush() - - prec = "fp16" if args.fp16 else "fp32" - latency = measurements['latency'] - throughput = audios.size(1)/latency - log_data = f"1,{sequence_lengths[0].item()},{prec},{latency},{throughput},{mel_lengths[0].item()}\n" - log_file = path.join(args.output, f"log_bs1_{prec}.log") - with open(log_file, 'a') as f: - f.write(log_data) - -if __name__ == "__main__": - main() diff --git a/demo/Tacotron2/tensorrt/run_latency_tests_trt.sh b/demo/Tacotron2/tensorrt/run_latency_tests_trt.sh deleted file mode 100644 index a289cf63..00000000 --- a/demo/Tacotron2/tensorrt/run_latency_tests_trt.sh +++ /dev/null @@ -1,17 +0,0 @@ -# -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -bash test_infer.sh --test tensorrt/test_infer_trt.py -bs 1 -il 128 --fp16 --num-iters 1003 --encoder ./output/encoder_fp16.engine --decoder ./output/decoder_with_outer_loop_fp16.engine --postnet ./output/postnet_fp16.engine --waveglow ./output/waveglow_fp16.engine --wn-channels 256 diff --git a/demo/Tacotron2/tensorrt/test_infer_trt.py b/demo/Tacotron2/tensorrt/test_infer_trt.py deleted file mode 100644 index 7023f02f..00000000 --- a/demo/Tacotron2/tensorrt/test_infer_trt.py +++ /dev/null @@ -1,230 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import sys -sys.path.append('./') -from tacotron2.text import text_to_sequence -import models -import tensorrt as trt -import torch -import argparse -import numpy as np -from scipy.io.wavfile import write - -from inference import checkpoint_from_distributed, unwrap_distributed, MeasureTime, prepare_input_sequence, load_and_setup_model -from inference_trt import infer_tacotron2_trt, infer_waveglow_trt - -from trt_utils import load_engine - -import time -import dllogger as DLLogger -from dllogger import StdOutBackend, JSONStreamBackend, Verbosity - -# from apex import amp - -def parse_args(parser): - """ - Parse commandline arguments. - """ - parser.add_argument('--encoder', type=str, required=True, - help='full path to the Encoder engine') - parser.add_argument('--decoder', type=str, required=True, - help='full path to the DecoderIter engine') - parser.add_argument('--postnet', type=str, required=True, - help='full path to the Postnet engine') - parser.add_argument('--waveglow', type=str, required=True, - help='full path to the WaveGlow engine') - parser.add_argument('--waveglow-ckpt', type=str, default="", - help='full path to the WaveGlow model checkpoint file') - parser.add_argument('-s', '--sigma-infer', default=0.6, type=float) - parser.add_argument('-sr', '--sampling-rate', default=22050, type=int, - help='Sampling rate') - parser.add_argument('--fp16', action='store_true', - help='inference with FP16') - parser.add_argument('--log-file', type=str, default='nvlog.json', - help='Filename for logging') - parser.add_argument('--stft-hop-length', type=int, default=256, - help='STFT hop length for estimating audio length from mel size') - parser.add_argument('--num-iters', type=int, default=10, - help='Number of iterations') - parser.add_argument('-il', '--input-length', type=int, default=64, - help='Input length') - parser.add_argument('-bs', '--batch-size', type=int, default=1, - help='Batch size') - - return parser - - -def print_stats(measurements_all): - - print(np.mean(measurements_all['latency'][1:]), - np.mean(measurements_all['throughput'][1:]), - np.mean(measurements_all['pre_processing'][1:]), - np.mean(measurements_all['type_conversion'][1:])+ - np.mean(measurements_all['storage'][1:])+ - np.mean(measurements_all['data_transfer'][1:]), - np.mean(measurements_all['num_mels_per_audio'][1:])) - - throughput = measurements_all['throughput'] - preprocessing = measurements_all['pre_processing'] - type_conversion = measurements_all['type_conversion'] - storage = measurements_all['storage'] - data_transfer = measurements_all['data_transfer'] - postprocessing = [sum(p) for p in zip(type_conversion,storage,data_transfer)] - latency = measurements_all['latency'] - num_mels_per_audio = measurements_all['num_mels_per_audio'] - - latency.sort() - - cf_50 = max(latency[:int(len(latency)*0.50)]) - cf_90 = max(latency[:int(len(latency)*0.90)]) - cf_95 = max(latency[:int(len(latency)*0.95)]) - cf_99 = max(latency[:int(len(latency)*0.99)]) - cf_100 = max(latency[:int(len(latency)*1.0)]) - - print("Throughput average (samples/sec) = {:.4f}".format(np.mean(throughput))) - print("Preprocessing average (seconds) = {:.4f}".format(np.mean(preprocessing))) - print("Postprocessing average (seconds) = {:.4f}".format(np.mean(postprocessing))) - print("Number of mels per audio average = {}".format(np.mean(num_mels_per_audio))) # - print("Latency average (seconds) = {:.4f}".format(np.mean(latency))) - print("Latency std (seconds) = {:.4f}".format(np.std(latency))) - print("Latency cl 50 (seconds) = {:.4f}".format(cf_50)) - print("Latency cl 90 (seconds) = {:.4f}".format(cf_90)) - print("Latency cl 95 (seconds) = {:.4f}".format(cf_95)) - print("Latency cl 99 (seconds) = {:.4f}".format(cf_99)) - print("Latency cl 100 (seconds) = {:.4f}".format(cf_100)) - - -def main(): - """ - Launches text to speech (inference). - Inference is executed on a single GPU. - """ - parser = argparse.ArgumentParser( - description='PyTorch Tacotron 2 Inference') - parser = parse_args(parser) - args, unknown_args = parser.parse_known_args() - - DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, args.log_file), - StdOutBackend(Verbosity.VERBOSE)]) - for k,v in vars(args).items(): - DLLogger.log(step="PARAMETER", data={k:v}) - DLLogger.log(step="PARAMETER", data={'model_name':'Tacotron2_PyT'}) - - measurements_all = {"pre_processing": [], - "tacotron2_encoder_time": [], - "tacotron2_decoder_time": [], - "tacotron2_postnet_time": [], - "tacotron2_latency": [], - "waveglow_latency": [], - "latency": [], - "type_conversion": [], - "data_transfer": [], - "storage": [], - "tacotron2_items_per_sec": [], - "waveglow_items_per_sec": [], - "num_mels_per_audio": [], - "throughput": []} - - print("args:", args, unknown_args) - - torch.cuda.init() - - TRT_LOGGER = trt.Logger(trt.Logger.WARNING) - encoder = load_engine(args.encoder, TRT_LOGGER) - decoder_iter = load_engine(args.decoder, TRT_LOGGER) - postnet = load_engine(args.postnet, TRT_LOGGER) - waveglow = load_engine(args.waveglow, TRT_LOGGER) - - if args.waveglow_ckpt != "": - # setup denoiser using WaveGlow PyTorch checkpoint - waveglow_ckpt = load_and_setup_model('WaveGlow', parser, - args.waveglow_ckpt, - fp16_run=args.fp16, - cpu_run=False, - forward_is_infer=True) - denoiser = Denoiser(waveglow_ckpt).cuda() - # after initialization, we don't need WaveGlow PyTorch checkpoint - # anymore - deleting - del waveglow_ckpt - torch.cuda.empty_cache() - - # create TRT contexts for each engine - encoder_context = encoder.create_execution_context() - decoder_context = decoder_iter.create_execution_context() - postnet_context = postnet.create_execution_context() - waveglow_context = waveglow.create_execution_context() - - - texts = ["The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves. The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves."] - texts = [texts[0][:args.input_length]] - texts = texts*args.batch_size - - warmup_iters = 3 - - for iter in range(args.num_iters): - - measurements = {} - - with MeasureTime(measurements, "pre_processing"): - sequences_padded, input_lengths = prepare_input_sequence(texts) - sequences_padded = sequences_padded.to(torch.int32) - input_lengths = input_lengths.to(torch.int32) - - with torch.no_grad(): - with MeasureTime(measurements, "latency"): - with MeasureTime(measurements, "tacotron2_latency"): - mel, mel_lengths = infer_tacotron2_trt(encoder, decoder_iter, postnet, - encoder_context, decoder_context, postnet_context, - sequences_padded, input_lengths, measurements, args.fp16, True) - - with MeasureTime(measurements, "waveglow_latency"): - audios = infer_waveglow_trt(waveglow, waveglow_context, mel, measurements, args.fp16) - - num_mels = mel.size(0)*mel.size(2) - num_samples = audios.size(0)*audios.size(1) - - with MeasureTime(measurements, "type_conversion"): - audios = audios.float() - - with MeasureTime(measurements, "data_transfer"): - audios = audios.cpu() - - with MeasureTime(measurements, "storage"): - audios = audios.numpy() - for i, audio in enumerate(audios): - audio_path = "audio_"+str(i)+".wav" - write(audio_path, args.sampling_rate, - audio[:mel_lengths[i]*args.stft_hop_length]) - - measurements['tacotron2_items_per_sec'] = num_mels/measurements['tacotron2_latency'] - measurements['waveglow_items_per_sec'] = num_samples/measurements['waveglow_latency'] - measurements['num_mels_per_audio'] = mel.size(2) - measurements['throughput'] = num_samples/measurements['latency'] - - if iter >= warmup_iters: - for k,v in measurements.items(): - if k in measurements_all.keys(): - measurements_all[k].append(v) - DLLogger.log(step=(iter-warmup_iters), data={k: v}) - - DLLogger.flush() - - print_stats(measurements_all) - -if __name__ == '__main__': - main() diff --git a/demo/Tacotron2/tensorrt/trt_utils.py b/demo/Tacotron2/tensorrt/trt_utils.py deleted file mode 100644 index e150983f..00000000 --- a/demo/Tacotron2/tensorrt/trt_utils.py +++ /dev/null @@ -1,154 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -import sys -import tensorrt as trt - -# For a single dimension this will return the min, opt, and max size when given -# input of either one or three (comma delimited) values -# dim="1" or dim=1 returns (1, 1, 1) -# dim="1,4,5" returns (1, 4, 5) -def parse_dynamic_size(dim): - split = str(dim).split(',') - assert len(split) in (1,3) , "Dynamic size input must be either 1 or 3 comma-separated integers" - ints = [int(i) for i in split] - - if len(ints) == 1: - ints *= 3 - - assert ints[0] <= ints[1] <= ints[2] - return tuple(ints) - - -def is_dimension_dynamic(dim): - return dim is None or dim <= 0 - - -def is_shape_dynamic(shape): - return any([is_dimension_dynamic(dim) for dim in shape]) - - -def run_trt_engine(context, engine, tensors): - - bindings = [0] * engine.num_io_tensors - - for i in range(engine.num_io_tensors): - tensor_name = engine.get_tensor_name(i) - if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT: - tensor = tensors['inputs'][tensor_name] - bindings[i] = tensor.data_ptr() - if is_shape_dynamic(engine.get_tensor_shape(tensor_name)): - context.set_input_shape(tensor_name, tensor.shape) - elif engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.OUTPUT: - tensor = tensors['outputs'][tensor_name] - bindings[i] = tensor.data_ptr() - - context.execute_v2(bindings=bindings) - - -def load_engine(engine_filepath, trt_logger): - with open(engine_filepath, "rb") as f, trt.Runtime(trt_logger) as runtime: - engine = runtime.deserialize_cuda_engine(f.read()) - return engine - - -def engine_info(engine_filepath): - - TRT_LOGGER = trt.Logger(trt.Logger.WARNING) - engine = load_engine(engine_filepath, TRT_LOGGER) - - binding_template = r""" -{btype} {{ - name: "{bname}" - data_type: {dtype} - dims: {dims} -}}""" - type_mapping = {"DataType.HALF": "TYPE_FP16", - "DataType.FLOAT": "TYPE_FP32", - "DataType.INT32": "TYPE_INT32", - "DataType.BOOL" : "TYPE_BOOL"} - - print("engine name", engine.name) - start_dim = 1 - print("num_optimization_profiles", engine.num_optimization_profiles) - print("device_memory_size:", engine.device_memory_size) - print("max_workspace_size:", engine.get_memory_pool_limit(trt.MemoryPoolType.WORKSPACE)) - print("num_layers:", engine.num_layers) - - for i in range(engine.num_io_tensors): - tensor_name = engine.get_tensor_name(i) - btype = "input" if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT else "output" - dtype = engine.get_tensor_dtype(tensor_name) - bdims = engine.get_tensor_shape(tensor_name) - config_values = { - "btype": btype, - "bname": tensor_name, - "dtype": type_mapping[str(dtype)], - "dims": list(bdims[start_dim:]) - } - final_binding_str = binding_template.format_map(config_values) - print(final_binding_str) - - -def build_engine(model_file, shapes, max_ws=512*1024*1024, fp16=False, timing_cache=None): - - TRT_LOGGER = trt.Logger(trt.Logger.WARNING) - builder = trt.Builder(TRT_LOGGER) - - config = builder.create_builder_config() - config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, max_ws) - if fp16: - config.flags |= 1 << int(trt.BuilderFlag.FP16) - profile = builder.create_optimization_profile() - for s in shapes: - profile.set_shape(s['name'], min=s['min'], opt=s['opt'], max=s['max']) - config.add_optimization_profile(profile) - - timing_cache_available = int(trt.__version__[0]) >= 8 and timing_cache != None - # load global timing cache - if timing_cache_available: - if os.path.exists(timing_cache): - with open(timing_cache, "rb") as f: - cache = config.create_timing_cache(f.read()) - config.set_timing_cache(cache, ignore_mismatch = False) - else: - cache = config.create_timing_cache(b"") - config.set_timing_cache(cache, ignore_mismatch = False) - - network_creation_flag = 0 - if "EXPLICIT_BATCH" in trt.NetworkDefinitionCreationFlag.__members__.keys(): - network_creation_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(network_creation_flag) - - with trt.OnnxParser(network, TRT_LOGGER) as parser: - with open(model_file, 'rb') as model: - parsed = parser.parse(model.read()) - for i in range(parser.num_errors): - print("TensorRT ONNX parser error:", parser.get_error(i)) - engine = builder.build_serialized_network(network, config=config) - - # save global timing cache - if timing_cache_available: - cache = config.get_timing_cache() - with cache.serialize() as buffer: - with open(timing_cache, "wb") as f: - f.write(buffer) - f.flush() - os.fsync(f) - - return engine diff --git a/demo/Tacotron2/test_infer.py b/demo/Tacotron2/test_infer.py deleted file mode 100644 index 23816da9..00000000 --- a/demo/Tacotron2/test_infer.py +++ /dev/null @@ -1,198 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import torch -import argparse -import numpy as np -from scipy.io.wavfile import write - -from inference import MeasureTime, prepare_input_sequence, load_and_setup_model - -import dllogger as DLLogger -from dllogger import StdOutBackend, JSONStreamBackend, Verbosity - -from waveglow.denoiser import Denoiser - -def parse_args(parser): - """ - Parse commandline arguments. - """ - parser.add_argument('--tacotron2', type=str, - help='Full path to the Tacotron2 model checkpoint file') - parser.add_argument('--waveglow', type=str, - help='Full path to the WaveGlow model checkpoint file') - parser.add_argument('-s', '--sigma-infer', default=0.6, type=float, - help='Standard deviation of the Gaussian distribution') - parser.add_argument('-d', '--denoising-strength', default=0.01, type=float, - help='Denoising strength for removing model bias') - parser.add_argument('-sr', '--sampling-rate', default=22050, type=int, - help='Sampling rate') - - run_mode = parser.add_mutually_exclusive_group() - run_mode.add_argument('--fp16', action='store_true', - help='Run inference with FP16') - run_mode.add_argument('--cpu', action='store_true', - help='Run inference on CPU') - - parser.add_argument('--log-file', type=str, default='nvlog.json', - help='Filename for logging') - parser.add_argument('--stft-hop-length', type=int, default=256, - help='STFT hop length for estimating audio length from mel size') - parser.add_argument('--num-iters', type=int, default=10, - help='Number of iterations') - parser.add_argument('-il', '--input-length', type=int, default=64, - help='Input length') - parser.add_argument('-bs', '--batch-size', type=int, default=1, - help='Batch size') - - - return parser - - -def print_stats(measurements_all): - - throughput = measurements_all['throughput'] - preprocessing = measurements_all['pre_processing'] - type_conversion = measurements_all['type_conversion'] - storage = measurements_all['storage'] - data_transfer = measurements_all['data_transfer'] - postprocessing = [sum(p) for p in zip(type_conversion,storage,data_transfer)] - latency = measurements_all['latency'] - waveglow_latency = measurements_all['waveglow_latency'] - tacotron2_latency = measurements_all['tacotron2_latency'] - denoiser_latency = measurements_all['denoiser_latency'] - num_mels_per_audio = measurements_all['num_mels_per_audio'] - - latency.sort() - - cf_50 = max(latency[:int(len(latency)*0.50)]) - cf_90 = max(latency[:int(len(latency)*0.90)]) - cf_95 = max(latency[:int(len(latency)*0.95)]) - cf_99 = max(latency[:int(len(latency)*0.99)]) - cf_100 = max(latency[:int(len(latency)*1.0)]) - - print("Throughput average (samples/sec) = {:.0f}".format(np.mean(throughput))) - print("Preprocessing average (seconds) = {:.4f}".format(np.mean(preprocessing))) - print("Postprocessing average (seconds) = {:.4f}".format(np.mean(postprocessing))) - print("Number of mels per audio average = {:.0f}".format(np.mean(num_mels_per_audio))) - print("Tacotron2 latency average (seconds) = {:.2f}".format(np.mean(tacotron2_latency))) - print("WaveGlow latency average (seconds) = {:.2f}".format(np.mean(waveglow_latency))) - print("Denoiser latency average (seconds) = {:.4f}".format(np.mean(denoiser_latency))) - print("Latency average (seconds) = {:.2f}".format(np.mean(latency))) - print("Latency std (seconds) = {:.2f}".format(np.std(latency))) - print("Latency cl 50 (seconds) = {:.2f}".format(cf_50)) - print("Latency cl 90 (seconds) = {:.2f}".format(cf_90)) - print("Latency cl 95 (seconds) = {:.2f}".format(cf_95)) - print("Latency cl 99 (seconds) = {:.2f}".format(cf_99)) - print("Latency cl 100 (seconds) = {:.2f}".format(cf_100)) - - -def main(): - """ - Launches text to speech (inference). - Inference is executed on a single GPU or CPU. - """ - parser = argparse.ArgumentParser( - description='PyTorch Tacotron 2 Inference') - parser = parse_args(parser) - args, unknown_args = parser.parse_known_args() - - DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, args.log_file), - StdOutBackend(Verbosity.VERBOSE)]) - for k,v in vars(args).items(): - DLLogger.log(step="PARAMETER", data={k:v}) - DLLogger.log(step="PARAMETER", data={'model_name':'Tacotron2_PyT'}) - - measurements_all = {"pre_processing": [], - "tacotron2_latency": [], - "waveglow_latency": [], - "denoiser_latency": [], - "latency": [], - "type_conversion": [], - "data_transfer": [], - "storage": [], - "tacotron2_items_per_sec": [], - "waveglow_items_per_sec": [], - "num_mels_per_audio": [], - "throughput": []} - - print("args:", args, unknown_args) - - tacotron2 = load_and_setup_model('Tacotron2', parser, args.tacotron2, - args.fp16, args.cpu, forward_is_infer=True) - waveglow = load_and_setup_model('WaveGlow', parser, args.waveglow, - args.fp16, args.cpu, forward_is_infer=True) - denoiser = Denoiser(waveglow) - if not args.cpu: - denoiser.cuda() - - texts = ["The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves. The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves."] - texts = [texts[0][:args.input_length]] - texts = texts*args.batch_size - - warmup_iters = 3 - - for iter in range(args.num_iters): - - measurements = {} - - with MeasureTime(measurements, "pre_processing", args.cpu): - sequences_padded, input_lengths = prepare_input_sequence(texts, args.cpu) - - with torch.no_grad(): - with MeasureTime(measurements, "latency", args.cpu): - with MeasureTime(measurements, "tacotron2_latency", args.cpu): - mel, mel_lengths, _ = tacotron2.infer(sequences_padded, input_lengths) - - with MeasureTime(measurements, "waveglow_latency", args.cpu): - audios = waveglow.infer(mel, sigma=args.sigma_infer) - - num_mels = mel.size(0)*mel.size(2) - num_samples = audios.size(0)*audios.size(1) - - with MeasureTime(measurements, "type_conversion", args.cpu): - audios = audios.float() - - with torch.no_grad(), MeasureTime(measurements, "denoiser_latency", args.cpu): - audios = denoiser(audios, strength=args.denoising_strength).squeeze(1) - - with MeasureTime(measurements, "data_transfer", args.cpu): - audios = audios.cpu() - - with MeasureTime(measurements, "storage", args.cpu): - audios = audios.numpy() - for i, audio in enumerate(audios): - audio_path = "audio_"+str(i)+".wav" - write(audio_path, args.sampling_rate, - audio[:mel_lengths[i]*args.stft_hop_length]) - - measurements['tacotron2_items_per_sec'] = num_mels/measurements['tacotron2_latency'] - measurements['waveglow_items_per_sec'] = num_samples/measurements['waveglow_latency'] - measurements['num_mels_per_audio'] = mel.size(2) - measurements['throughput'] = num_samples/measurements['latency'] - - if iter >= warmup_iters: - for k,v in measurements.items(): - measurements_all[k].append(v) - DLLogger.log(step=(iter-warmup_iters), data={k: v}) - - DLLogger.flush() - - print_stats(measurements_all) - -if __name__ == '__main__': - main() diff --git a/demo/Tacotron2/test_infer.sh b/demo/Tacotron2/test_infer.sh deleted file mode 100644 index 103fb941..00000000 --- a/demo/Tacotron2/test_infer.sh +++ /dev/null @@ -1,126 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -BATCH_SIZE=1 -INPUT_LENGTH=128 -NUM_ITERS=1003 # extra 3 iterations for warmup -TACOTRON2_CKPT="nvidia_tacotron2pyt_fp16_20190427" -WAVEGLOW_CKPT="nvidia_waveglow256pyt_fp16" -RUN_MODE="" # = fp32 -LOG_RUN_MODE="gpu_fp32" -TEST_PROGRAM="test_infer.py" -WN_CHANNELS=512 -LOG_SUFFIX_ADD="" #additional info, e.g., GPU type - -while [ -n "$1" ] -do - case "$1" in - -bs|--batch-size) - BATCH_SIZE="$2" - shift - ;; - -il|--input-length) - INPUT_LENGTH="$2" - shift - ;; - --num-iters) - NUM_ITERS="$2" - shift - ;; - --test) - TEST_PROGRAM="$2" - shift - ;; - --tacotron2) - TACOTRON2_CKPT="$2" - shift - ;; - --encoder) - ENCODER_CKPT="$2" - shift - ;; - --decoder) - DECODER_CKPT="$2" - shift - ;; - --postnet) - POSTNET_CKPT="$2" - shift - ;; - --waveglow) - WAVEGLOW_CKPT="$2" - shift - ;; - --wn-channels) - WN_CHANNELS="$2" - shift - ;; - --cpu) - RUN_MODE="--cpu" - LOG_RUN_MODE="cpu_fp32" - ;; - --fp16) - RUN_MODE="--fp16" - LOG_RUN_MODE="gpu_fp16" - ;; - --log-suffix) - LOG_SUFFIX_ADD="$2" - shift - ;; - *) - echo "Option $1 not recognized" - esac - shift -done - -LOG_SUFFIX=bs${BATCH_SIZE}_il${INPUT_LENGTH}_${LOG_RUN_MODE}_wn${WN_CHANNELS}_${LOG_SUFFIX_ADD} -NVLOG_FILE=nvlog_${LOG_SUFFIX}.json -TMP_LOGFILE=tmp_log_${LOG_SUFFIX}.log -LOGFILE=log_${LOG_SUFFIX}.log - - -if [ "$TEST_PROGRAM" = "tensorrt/test_infer_trt.py" ] -then - TACOTRON2_PARAMS="--encoder $ENCODER_CKPT --decoder $DECODER_CKPT --postnet $POSTNET_CKPT" -else - TACOTRON2_PARAMS="--tacotron2 $TACOTRON2_CKPT" -fi - -set -x -python3 $TEST_PROGRAM \ - $TACOTRON2_PARAMS \ - --waveglow $WAVEGLOW_CKPT \ - --batch-size $BATCH_SIZE \ - --input-length $INPUT_LENGTH \ - --log-file $NVLOG_FILE \ - --num-iters $NUM_ITERS \ - --wn-channels $WN_CHANNELS \ - $RUN_MODE \ - |& tee $TMP_LOGFILE -set +x - - -PERF=$(cat $TMP_LOGFILE | grep -F 'Throughput average (samples/sec)' | awk -F'= ' '{print $2}') -NUM_MELS=$(cat $TMP_LOGFILE | grep -F 'Number of mels per audio average' | awk -F'= ' '{print $2}') -LATENCY=$(cat $TMP_LOGFILE | grep -F 'Latency average (seconds)' | awk -F'= ' '{print $2}') -LATENCYSTD=$(cat $TMP_LOGFILE | grep -F 'Latency std (seconds)' | awk -F'= ' '{print $2}') -LATENCY50=$(cat $TMP_LOGFILE | grep -F 'Latency cl 50 (seconds)' | awk -F'= ' '{print $2}') -LATENCY90=$(cat $TMP_LOGFILE | grep -F 'Latency cl 90 (seconds)' | awk -F'= ' '{print $2}') -LATENCY95=$(cat $TMP_LOGFILE | grep -F 'Latency cl 95 (seconds)' | awk -F'= ' '{print $2}') -LATENCY99=$(cat $TMP_LOGFILE | grep -F 'Latency cl 99 (seconds)' | awk -F'= ' '{print $2}') - -echo "$BATCH_SIZE,$INPUT_LENGTH,$LOG_RUN_MODE,$NUM_ITERS,$LATENCY,$LATENCYSTD,$LATENCY50,$LATENCY90,$LATENCY95,$LATENCY99,$PERF,$NUM_MELS" | tee $LOGFILE diff --git a/demo/Tacotron2/train.py b/demo/Tacotron2/train.py deleted file mode 100644 index 55a9e56f..00000000 --- a/demo/Tacotron2/train.py +++ /dev/null @@ -1,535 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -import time -import argparse -import numpy as np -from contextlib import contextmanager - -import torch -from torch.utils.data import DataLoader -from torch.autograd import Variable -from torch.nn.parameter import Parameter - -import torch.distributed as dist -from torch.utils.data.distributed import DistributedSampler - -from apex.parallel import DistributedDataParallel as DDP - -import models -import loss_functions -import data_functions - -import dllogger as DLLogger -from dllogger import StdOutBackend, JSONStreamBackend, Verbosity - -from scipy.io.wavfile import write as write_wav - -from apex import amp -amp.lists.functional_overrides.FP32_FUNCS.remove('softmax') -amp.lists.functional_overrides.FP16_FUNCS.append('softmax') - - -def parse_args(parser): - """ - Parse commandline arguments. - """ - - parser.add_argument('-o', '--output', type=str, required=True, - help='Directory to save checkpoints') - parser.add_argument('-d', '--dataset-path', type=str, - default='./', help='Path to dataset') - parser.add_argument('-m', '--model-name', type=str, default='', required=True, - help='Model to train') - parser.add_argument('--log-file', type=str, default='nvlog.json', - help='Filename for logging') - parser.add_argument('--anneal-steps', nargs='*', - help='Epochs after which decrease learning rate') - parser.add_argument('--anneal-factor', type=float, choices=[0.1, 0.3], default=0.1, - help='Factor for annealing learning rate') - - # training - training = parser.add_argument_group('training setup') - training.add_argument('--epochs', type=int, required=True, - help='Number of total epochs to run') - training.add_argument('--epochs-per-checkpoint', type=int, default=50, - help='Number of epochs per checkpoint') - training.add_argument('--checkpoint-path', type=str, default='', - help='Checkpoint path to resume training') - training.add_argument('--resume-from-last', action='store_true', - help='Resumes training from the last checkpoint; uses the directory provided with \'--output\' option to search for the checkpoint \"checkpoint__last.pt\"') - training.add_argument('--dynamic-loss-scaling', type=bool, default=True, - help='Enable dynamic loss scaling') - training.add_argument('--amp', action='store_true', - help='Enable AMP') - training.add_argument('--cudnn-enabled', action='store_true', - help='Enable cudnn') - training.add_argument('--cudnn-benchmark', action='store_true', - help='Run cudnn benchmark') - training.add_argument('--disable-uniform-initialize-bn-weight', action='store_true', - help='disable uniform initialization of batchnorm layer weight') - - optimization = parser.add_argument_group('optimization setup') - optimization.add_argument( - '--use-saved-learning-rate', default=False, type=bool) - optimization.add_argument('-lr', '--learning-rate', type=float, required=True, - help='Learing rate') - optimization.add_argument('--weight-decay', default=1e-6, type=float, - help='Weight decay') - optimization.add_argument('--grad-clip-thresh', default=1.0, type=float, - help='Clip threshold for gradients') - optimization.add_argument('-bs', '--batch-size', type=int, required=True, - help='Batch size per GPU') - optimization.add_argument('--grad-clip', default=5.0, type=float, - help='Enables gradient clipping and sets maximum gradient norm value') - - # dataset parameters - dataset = parser.add_argument_group('dataset parameters') - dataset.add_argument('--load-mel-from-disk', action='store_true', - help='Loads mel spectrograms from disk instead of computing them on the fly') - dataset.add_argument('--training-files', - default='filelists/ljs_audio_text_train_filelist.txt', - type=str, help='Path to training filelist') - dataset.add_argument('--validation-files', - default='filelists/ljs_audio_text_val_filelist.txt', - type=str, help='Path to validation filelist') - dataset.add_argument('--text-cleaners', nargs='*', - default=['english_cleaners'], type=str, - help='Type of text cleaners for input text') - - # audio parameters - audio = parser.add_argument_group('audio parameters') - audio.add_argument('--max-wav-value', default=32768.0, type=float, - help='Maximum audiowave value') - audio.add_argument('--sampling-rate', default=22050, type=int, - help='Sampling rate') - audio.add_argument('--filter-length', default=1024, type=int, - help='Filter length') - audio.add_argument('--hop-length', default=256, type=int, - help='Hop (stride) length') - audio.add_argument('--win-length', default=1024, type=int, - help='Window length') - audio.add_argument('--mel-fmin', default=0.0, type=float, - help='Minimum mel frequency') - audio.add_argument('--mel-fmax', default=8000.0, type=float, - help='Maximum mel frequency') - - distributed = parser.add_argument_group('distributed setup') - # distributed.add_argument('--distributed-run', default=True, type=bool, - # help='enable distributed run') - distributed.add_argument('--rank', default=0, type=int, - help='Rank of the process, do not set! Done by multiproc module') - distributed.add_argument('--world-size', default=1, type=int, - help='Number of processes, do not set! Done by multiproc module') - distributed.add_argument('--dist-url', type=str, default='tcp://localhost:23456', - help='Url used to set up distributed training') - distributed.add_argument('--group-name', type=str, default='group_name', - required=False, help='Distributed group name') - distributed.add_argument('--dist-backend', default='nccl', type=str, choices={'nccl'}, - help='Distributed run backend') - - benchmark = parser.add_argument_group('benchmark') - benchmark.add_argument('--bench-class', type=str, default='') - - return parser - - -def reduce_tensor(tensor, num_gpus): - rt = tensor.clone() - dist.all_reduce(rt, op=dist.reduce_op.SUM) - rt /= num_gpus - return rt - - -def init_distributed(args, world_size, rank, group_name): - assert torch.cuda.is_available(), "Distributed mode requires CUDA." - print("Initializing Distributed") - - # Set cuda device so everything is done on the right GPU. - torch.cuda.set_device(rank % torch.cuda.device_count()) - - # Initialize distributed communication - dist.init_process_group( - backend=args.dist_backend, init_method=args.dist_url, - world_size=world_size, rank=rank, group_name=group_name) - - print("Done initializing distributed") - - -def save_checkpoint(model, optimizer, epoch, config, amp_run, output_dir, model_name, - local_rank, world_size): - - random_rng_state = torch.random.get_rng_state().cuda() - cuda_rng_state = torch.cuda.get_rng_state(local_rank).cuda() - - random_rng_states_all = [torch.empty_like(random_rng_state) for _ in range(world_size)] - cuda_rng_states_all = [torch.empty_like(cuda_rng_state) for _ in range(world_size)] - - if world_size > 1: - dist.all_gather(random_rng_states_all, random_rng_state) - dist.all_gather(cuda_rng_states_all, cuda_rng_state) - else: - random_rng_states_all = [random_rng_state] - cuda_rng_states_all = [cuda_rng_state] - - random_rng_states_all = torch.stack(random_rng_states_all).cpu() - cuda_rng_states_all = torch.stack(cuda_rng_states_all).cpu() - - if local_rank == 0: - checkpoint = {'epoch': epoch, - 'cuda_rng_state_all': cuda_rng_states_all, - 'random_rng_states_all': random_rng_states_all, - 'config': config, - 'state_dict': model.state_dict(), - 'optimizer': optimizer.state_dict()} - if amp_run: - checkpoint['amp'] = amp.state_dict() - - checkpoint_filename = "checkpoint_{}_{}.pt".format(model_name, epoch) - checkpoint_path = os.path.join( - output_dir, checkpoint_filename) - print("Saving model and optimizer state at epoch {} to {}".format( - epoch, checkpoint_path)) - torch.save(checkpoint, checkpoint_path) - - symlink_src = checkpoint_filename - symlink_dst = os.path.join( - output_dir, "checkpoint_{}_last.pt".format(model_name)) - if os.path.exists(symlink_dst) and os.path.islink(symlink_dst): - print("|||| Updating symlink", symlink_dst, "to point to", symlink_src) - os.remove(symlink_dst) - - os.symlink(symlink_src, symlink_dst) - - -def get_last_checkpoint_filename(output_dir, model_name): - symlink = os.path.join(output_dir, "checkpoint_{}_last.pt".format(model_name)) - if os.path.exists(symlink): - print("|||| Loading checkpoint from symlink", symlink) - return os.path.join(output_dir, os.readlink(symlink)) - else: - print("|||| No last checkpoint available - starting from epoch 0 ") - return "" - - -def load_checkpoint(model, optimizer, epoch, config, amp_run, filepath, local_rank): - - checkpoint = torch.load(filepath, map_location='cpu') - - epoch[0] = checkpoint['epoch']+1 - device_id = local_rank % torch.cuda.device_count() - torch.cuda.set_rng_state(checkpoint['cuda_rng_state_all'][device_id]) - torch.random.set_rng_state(checkpoint['random_rng_states_all'][device_id]) - config = checkpoint['config'] - model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - - if amp_run: - amp.load_state_dict(checkpoint['amp']) - - -# adapted from: https://discuss.pytorch.org/t/opinion-eval-should-be-a-context-manager/18998/3 -# Following snippet is licensed under MIT license - -@contextmanager -def evaluating(model): - '''Temporarily switch to evaluation mode.''' - istrain = model.training - try: - model.eval() - yield model - finally: - if istrain: - model.train() - - -def validate(model, criterion, valset, epoch, batch_iter, batch_size, - world_size, collate_fn, distributed_run, rank, batch_to_gpu): - """Handles all the validation scoring and printing""" - with evaluating(model), torch.no_grad(): - val_sampler = DistributedSampler(valset) if distributed_run else None - val_loader = DataLoader(valset, num_workers=1, shuffle=False, - sampler=val_sampler, - batch_size=batch_size, pin_memory=False, - collate_fn=collate_fn) - - val_loss = 0.0 - num_iters = 0 - val_items_per_sec = 0.0 - for i, batch in enumerate(val_loader): - torch.cuda.synchronize() - iter_start_time = time.perf_counter() - - x, y, num_items = batch_to_gpu(batch) - y_pred = model(x) - loss = criterion(y_pred, y) - if distributed_run: - reduced_val_loss = reduce_tensor(loss.data, world_size).item() - reduced_num_items = reduce_tensor(num_items.data, 1).item() - else: # - reduced_val_loss = loss.item() - reduced_num_items = num_items.item() - val_loss += reduced_val_loss - - torch.cuda.synchronize() - iter_stop_time = time.perf_counter() - iter_time = iter_stop_time - iter_start_time - - items_per_sec = reduced_num_items/iter_time - DLLogger.log(step=(epoch, batch_iter, i), data={'val_items_per_sec': items_per_sec}) - val_items_per_sec += items_per_sec - num_iters += 1 - - val_loss = val_loss/(i + 1) - - DLLogger.log(step=(epoch,), data={'val_loss': val_loss}) - DLLogger.log(step=(epoch,), data={'val_items_per_sec': - (val_items_per_sec/num_iters if num_iters > 0 else 0.0)}) - - return val_loss - -def adjust_learning_rate(iteration, epoch, optimizer, learning_rate, - anneal_steps, anneal_factor, rank): - - p = 0 - if anneal_steps is not None: - for i, a_step in enumerate(anneal_steps): - if epoch >= int(a_step): - p = p+1 - - if anneal_factor == 0.3: - lr = learning_rate*((0.1 ** (p//2))*(1.0 if p % 2 == 0 else 0.3)) - else: - lr = learning_rate*(anneal_factor ** p) - - if optimizer.param_groups[0]['lr'] != lr: - DLLogger.log(step=(epoch, iteration), data={'learning_rate changed': str(optimizer.param_groups[0]['lr'])+" -> "+str(lr)}) - - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def main(): - - parser = argparse.ArgumentParser(description='PyTorch Tacotron 2 Training') - parser = parse_args(parser) - args, _ = parser.parse_known_args() - - if 'LOCAL_RANK' in os.environ and 'WORLD_SIZE' in os.environ: - local_rank = int(os.environ['LOCAL_RANK']) - world_size = int(os.environ['WORLD_SIZE']) - else: - local_rank = args.rank - world_size = args.world_size - - distributed_run = world_size > 1 - - if local_rank == 0: - DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, - args.output+'/'+args.log_file), - StdOutBackend(Verbosity.VERBOSE)]) - else: - DLLogger.init(backends=[]) - - for k,v in vars(args).items(): - DLLogger.log(step="PARAMETER", data={k:v}) - DLLogger.log(step="PARAMETER", data={'model_name':'Tacotron2_PyT'}) - - model_name = args.model_name - parser = models.parse_model_args(model_name, parser) - args, _ = parser.parse_known_args() - - torch.backends.cudnn.enabled = args.cudnn_enabled - torch.backends.cudnn.benchmark = args.cudnn_benchmark - - if distributed_run: - init_distributed(args, world_size, local_rank, args.group_name) - - torch.cuda.synchronize() - run_start_time = time.perf_counter() - - model_config = models.get_model_config(model_name, args) - model = models.get_model(model_name, model_config, - to_cuda=True, - uniform_initialize_bn_weight=not args.disable_uniform_initialize_bn_weight) - - if not args.amp and distributed_run: - model = DDP(model) - - optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, - weight_decay=args.weight_decay) - - if args.amp: - model, optimizer = amp.initialize(model, optimizer, opt_level="O1") - if distributed_run: - model = DDP(model) - - try: - sigma = args.sigma - except AttributeError: - sigma = None - - start_epoch = [0] - - if args.resume_from_last: - args.checkpoint_path = get_last_checkpoint_filename(args.output, model_name) - - if args.checkpoint_path is not "": - load_checkpoint(model, optimizer, start_epoch, model_config, - args.amp, args.checkpoint_path, local_rank) - - start_epoch = start_epoch[0] - - criterion = loss_functions.get_loss_function(model_name, sigma) - - try: - n_frames_per_step = args.n_frames_per_step - except AttributeError: - n_frames_per_step = None - - collate_fn = data_functions.get_collate_function( - model_name, n_frames_per_step) - trainset = data_functions.get_data_loader( - model_name, args.dataset_path, args.training_files, args) - if distributed_run: - train_sampler = DistributedSampler(trainset) - shuffle = False - else: - train_sampler = None - shuffle = True - - train_loader = DataLoader(trainset, num_workers=1, shuffle=shuffle, - sampler=train_sampler, - batch_size=args.batch_size, pin_memory=False, - drop_last=True, collate_fn=collate_fn) - - valset = data_functions.get_data_loader( - model_name, args.dataset_path, args.validation_files, args) - - batch_to_gpu = data_functions.get_batch_to_gpu(model_name) - - iteration = 0 - train_epoch_items_per_sec = 0.0 - val_loss = 0.0 - num_iters = 0 - - model.train() - - for epoch in range(start_epoch, args.epochs): - torch.cuda.synchronize() - epoch_start_time = time.perf_counter() - # used to calculate avg items/sec over epoch - reduced_num_items_epoch = 0 - - train_epoch_items_per_sec = 0.0 - - num_iters = 0 - reduced_loss = 0 - - # if overflow at the last iteration then do not save checkpoint - overflow = False - - if distributed_run: - train_loader.sampler.set_epoch(epoch) - - for i, batch in enumerate(train_loader): - torch.cuda.synchronize() - iter_start_time = time.perf_counter() - DLLogger.log(step=(epoch, i), - data={'glob_iter/iters_per_epoch': str(iteration)+"/"+str(len(train_loader))}) - - adjust_learning_rate(iteration, epoch, optimizer, args.learning_rate, - args.anneal_steps, args.anneal_factor, local_rank) - - model.zero_grad() - x, y, num_items = batch_to_gpu(batch) - - y_pred = model(x) - loss = criterion(y_pred, y) - - if distributed_run: - reduced_loss = reduce_tensor(loss.data, world_size).item() - reduced_num_items = reduce_tensor(num_items.data, 1).item() - else: - reduced_loss = loss.item() - reduced_num_items = num_items.item() - if np.isnan(reduced_loss): - raise Exception("loss is NaN") - - DLLogger.log(step=(epoch,i), data={'train_loss': reduced_loss}) - - num_iters += 1 - - # accumulate number of items processed in this epoch - reduced_num_items_epoch += reduced_num_items - - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - grad_norm = torch.nn.utils.clip_grad_norm_( - amp.master_params(optimizer), args.grad_clip_thresh) - else: - loss.backward() - grad_norm = torch.nn.utils.clip_grad_norm_( - model.parameters(), args.grad_clip_thresh) - - optimizer.step() - - torch.cuda.synchronize() - iter_stop_time = time.perf_counter() - iter_time = iter_stop_time - iter_start_time - items_per_sec = reduced_num_items/iter_time - train_epoch_items_per_sec += items_per_sec - - DLLogger.log(step=(epoch, i), data={'train_items_per_sec': items_per_sec}) - DLLogger.log(step=(epoch, i), data={'train_iter_time': iter_time}) - iteration += 1 - - torch.cuda.synchronize() - epoch_stop_time = time.perf_counter() - epoch_time = epoch_stop_time - epoch_start_time - - DLLogger.log(step=(epoch,), data={'train_items_per_sec': - (train_epoch_items_per_sec/num_iters if num_iters > 0 else 0.0)}) - DLLogger.log(step=(epoch,), data={'train_loss': reduced_loss}) - DLLogger.log(step=(epoch,), data={'train_epoch_time': epoch_time}) - - val_loss = validate(model, criterion, valset, epoch, iteration, - args.batch_size, world_size, collate_fn, - distributed_run, local_rank, batch_to_gpu) - - if (epoch % args.epochs_per_checkpoint == 0) and args.bench_class == "": - save_checkpoint(model, optimizer, epoch, model_config, - args.amp, args.output, args.model_name, - local_rank, world_size) - if local_rank == 0: - DLLogger.flush() - - torch.cuda.synchronize() - run_stop_time = time.perf_counter() - run_time = run_stop_time - run_start_time - DLLogger.log(step=tuple(), data={'run_time': run_time}) - DLLogger.log(step=tuple(), data={'val_loss': val_loss}) - DLLogger.log(step=tuple(), data={'train_items_per_sec': - (train_epoch_items_per_sec/num_iters if num_iters > 0 else 0.0)}) - - if local_rank == 0: - DLLogger.flush() - -if __name__ == '__main__': - main() diff --git a/demo/Tacotron2/waveglow/arg_parser.py b/demo/Tacotron2/waveglow/arg_parser.py deleted file mode 100644 index 7002bf6d..00000000 --- a/demo/Tacotron2/waveglow/arg_parser.py +++ /dev/null @@ -1,55 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse - -def parse_waveglow_args(parent, add_help=False): - """ - Parse commandline arguments. - """ - parser = argparse.ArgumentParser(parents=[parent], add_help=add_help) - - # misc parameters - parser.add_argument('--n-mel-channels', default=80, type=int, - help='Number of bins in mel-spectrograms') - - # glow parameters - parser.add_argument('--flows', default=12, type=int, - help='Number of steps of flow') - parser.add_argument('--groups', default=8, type=int, - help='Number of samples in a group processed by the steps of flow') - parser.add_argument('--early-every', default=4, type=int, - help='Determines how often (i.e., after how many coupling layers) \ - a number of channels (defined by --early-size parameter) are output\ - to the loss function') - parser.add_argument('--early-size', default=2, type=int, - help='Number of channels output to the loss function') - parser.add_argument('--sigma', default=1.0, type=float, - help='Standard deviation used for sampling from Gaussian') - parser.add_argument('--segment-length', default=4000, type=int, - help='Segment length (audio samples) processed per iteration') - - # wavenet parameters - wavenet = parser.add_argument_group('WaveNet parameters') - wavenet.add_argument('--wn-kernel-size', default=3, type=int, - help='Kernel size for dialted convolution in the affine coupling layer (WN)') - wavenet.add_argument('--wn-channels', default=512, type=int, - help='Number of channels in WN') - wavenet.add_argument('--wn-layers', default=8, type=int, - help='Number of layers in WN') - - return parser diff --git a/demo/Tacotron2/waveglow/data_function.py b/demo/Tacotron2/waveglow/data_function.py deleted file mode 100644 index 62076eba..00000000 --- a/demo/Tacotron2/waveglow/data_function.py +++ /dev/null @@ -1,78 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import torch -import random -import common.layers as layers -from common.utils import load_wav_to_torch, load_filepaths_and_text, to_gpu - - -class MelAudioLoader(torch.utils.data.Dataset): - """ - 1) loads audio,text pairs - 2) computes mel-spectrograms from audio files. - """ - - def __init__(self, dataset_path, audiopaths_and_text, args): - self.audiopaths_and_text = load_filepaths_and_text(dataset_path, audiopaths_and_text) - self.max_wav_value = args.max_wav_value - self.sampling_rate = args.sampling_rate - self.stft = layers.TacotronSTFT( - args.filter_length, args.hop_length, args.win_length, - args.n_mel_channels, args.sampling_rate, args.mel_fmin, - args.mel_fmax) - self.segment_length = args.segment_length - random.seed(1234) - random.shuffle(self.audiopaths_and_text) - - def get_mel_audio_pair(self, filename): - audio, sampling_rate = load_wav_to_torch(filename) - - if sampling_rate != self.stft.sampling_rate: - raise ValueError("{} {} SR doesn't match target {} SR".format( - sampling_rate, self.stft.sampling_rate)) - - # Take segment - if audio.size(0) >= self.segment_length: - max_audio_start = audio.size(0) - self.segment_length - audio_start = random.randint(0, max_audio_start) - audio = audio[audio_start:audio_start+self.segment_length] - else: - audio = torch.nn.functional.pad( - audio, (0, self.segment_length - audio.size(0)), 'constant').data - - audio = audio / self.max_wav_value - audio_norm = audio.unsqueeze(0) - audio_norm = torch.autograd.Variable(audio_norm, requires_grad=False) - melspec = self.stft.mel_spectrogram(audio_norm) - melspec = melspec.squeeze(0) - - return (melspec, audio, len(audio)) - - def __getitem__(self, index): - return self.get_mel_audio_pair(self.audiopaths_and_text[index][0]) - - def __len__(self): - return len(self.audiopaths_and_text) - - -def batch_to_gpu(batch): - x, y, len_y = batch - x = to_gpu(x).float() - y = to_gpu(y).float() - len_y = to_gpu(torch.sum(len_y)) - return ((x, y), y, len_y) diff --git a/demo/Tacotron2/waveglow/denoiser.py b/demo/Tacotron2/waveglow/denoiser.py deleted file mode 100644 index 5dc2d789..00000000 --- a/demo/Tacotron2/waveglow/denoiser.py +++ /dev/null @@ -1,53 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import sys -sys.path.append('tacotron2') -import torch -from common.layers import STFT - - -class Denoiser(torch.nn.Module): - """ Removes model bias from audio produced with waveglow """ - - def __init__(self, waveglow, filter_length=1024, n_overlap=4, - win_length=1024, mode='zeros'): - super(Denoiser, self).__init__() - device = waveglow.upsample.weight.device - dtype = waveglow.upsample.weight.dtype - self.stft = STFT(filter_length=filter_length, - hop_length=int(filter_length/n_overlap), - win_length=win_length).to(device) - if mode == 'zeros': - mel_input = torch.zeros((1, 80, 88), dtype=dtype, device=device) - elif mode == 'normal': - mel_input = torch.randn((1, 80, 88), dtype=dtype, device=device) - else: - raise Exception("Mode {} if not supported".format(mode)) - - with torch.no_grad(): - bias_audio = waveglow.infer(mel_input, sigma=0.0).float() - bias_spec, _ = self.stft.transform(bias_audio) - - self.register_buffer('bias_spec', bias_spec[:, :, 0][:, :, None]) - - def forward(self, audio, strength=0.1): - audio_spec, audio_angles = self.stft.transform(audio) - audio_spec_denoised = audio_spec - self.bias_spec * strength - audio_spec_denoised = torch.clamp(audio_spec_denoised, 0.0) - audio_denoised = self.stft.inverse(audio_spec_denoised, audio_angles) - return audio_denoised diff --git a/demo/Tacotron2/waveglow/loss_function.py b/demo/Tacotron2/waveglow/loss_function.py deleted file mode 100644 index 75620df9..00000000 --- a/demo/Tacotron2/waveglow/loss_function.py +++ /dev/null @@ -1,38 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import torch - -class WaveGlowLoss(torch.nn.Module): - def __init__(self, sigma=1.0): - super(WaveGlowLoss, self).__init__() - self.sigma = sigma - - def forward(self, model_output, clean_audio): - # clean_audio is unused; - z, log_s_list, log_det_W_list = model_output - for i, log_s in enumerate(log_s_list): - if i == 0: - log_s_total = torch.sum(log_s) - log_det_W_total = log_det_W_list[i] - else: - log_s_total = log_s_total + torch.sum(log_s) - log_det_W_total += log_det_W_list[i] - - loss = torch.sum( - z * z) / (2 * self.sigma * self.sigma) - log_s_total - log_det_W_total # noqa: E501 - return loss / (z.size(0) * z.size(1) * z.size(2)) diff --git a/demo/Tacotron2/waveglow/model.py b/demo/Tacotron2/waveglow/model.py deleted file mode 100644 index 00a26421..00000000 --- a/demo/Tacotron2/waveglow/model.py +++ /dev/null @@ -1,343 +0,0 @@ -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import torch -from torch.autograd import Variable -import torch.nn.functional as F -import numpy as np - - -@torch.jit.script -def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): - n_channels_int = n_channels[0] - in_act = input_a + input_b - t_act = torch.tanh(in_act[:, :n_channels_int, :]) - s_act = torch.sigmoid(in_act[:, n_channels_int:, :]) - acts = t_act * s_act - return acts - - -class Invertible1x1Conv(torch.nn.Module): - """ - The layer outputs both the convolution, and the log determinant - of its weight matrix. If reverse=True it does convolution with - inverse - """ - - def __init__(self, c): - super(Invertible1x1Conv, self).__init__() - self.conv = torch.nn.Conv1d(c, c, kernel_size=1, stride=1, padding=0, - bias=False) - - # Sample a random orthonormal matrix to initialize weights - W = torch.qr(torch.FloatTensor(c, c).normal_())[0] - - # Ensure determinant is 1.0 not -1.0 - if torch.det(W) < 0: - W[:, 0] = -1 * W[:, 0] - W = W.view(c, c, 1) - W = W.contiguous() - self.conv.weight.data = W - - def forward(self, z): - # shape - batch_size, group_size, n_of_groups = z.size() - - W = self.conv.weight.squeeze() - - # Forward computation - log_det_W = batch_size * n_of_groups * torch.logdet(W.unsqueeze(0).float()).squeeze() - z = self.conv(z) - return z, log_det_W - - - def infer(self, z): - # shape - batch_size, group_size, n_of_groups = z.size() - - W = self.conv.weight.squeeze() - - if not hasattr(self, 'W_inverse'): - # Reverse computation - W_inverse = W.float().inverse() - W_inverse = Variable(W_inverse[..., None]) - if z.type() == 'torch.cuda.HalfTensor' or z.type() == 'torch.HalfTensor': - W_inverse = W_inverse.half() - self.W_inverse = W_inverse - z = F.conv1d(z, self.W_inverse, bias=None, stride=1, padding=0) - return z - - -class WN(torch.nn.Module): - """ - This is the WaveNet like layer for the affine coupling. The primary - difference from WaveNet is the convolutions need not be causal. There is - also no dilation size reset. The dilation only doubles on each layer - """ - - def __init__(self, n_in_channels, n_mel_channels, n_layers, n_channels, - kernel_size): - super(WN, self).__init__() - assert(kernel_size % 2 == 1) - assert(n_channels % 2 == 0) - self.n_layers = n_layers - self.n_channels = n_channels - self.in_layers = torch.nn.ModuleList() - self.res_skip_layers = torch.nn.ModuleList() - self.cond_layers = torch.nn.ModuleList() - - start = torch.nn.Conv1d(n_in_channels, n_channels, 1) - start = torch.nn.utils.weight_norm(start, name='weight') - self.start = start - - # Initializing last layer to 0 makes the affine coupling layers - # do nothing at first. This helps with training stability - end = torch.nn.Conv1d(n_channels, 2 * n_in_channels, 1) - end.weight.data.zero_() - end.bias.data.zero_() - self.end = end - - for i in range(n_layers): - dilation = 2 ** i - padding = int((kernel_size * dilation - dilation) / 2) - in_layer = torch.nn.Conv1d(n_channels, 2 * n_channels, kernel_size, - dilation=dilation, padding=padding) - in_layer = torch.nn.utils.weight_norm(in_layer, name='weight') - self.in_layers.append(in_layer) - - cond_layer = torch.nn.Conv1d(n_mel_channels, 2 * n_channels, 1) - cond_layer = torch.nn.utils.weight_norm(cond_layer, name='weight') - self.cond_layers.append(cond_layer) - - # last one is not necessary - if i < n_layers - 1: - res_skip_channels = 2 * n_channels - else: - res_skip_channels = n_channels - res_skip_layer = torch.nn.Conv1d(n_channels, res_skip_channels, 1) - res_skip_layer = torch.nn.utils.weight_norm( - res_skip_layer, name='weight') - self.res_skip_layers.append(res_skip_layer) - - def forward(self, forward_input): - audio, spect = forward_input - audio = self.start(audio) - - for i in range(self.n_layers): - acts = fused_add_tanh_sigmoid_multiply( - self.in_layers[i](audio), - self.cond_layers[i](spect), - torch.IntTensor([self.n_channels])) - - res_skip_acts = self.res_skip_layers[i](acts) - if i < self.n_layers - 1: - audio = res_skip_acts[:, :self.n_channels, :] + audio - skip_acts = res_skip_acts[:, self.n_channels:, :] - else: - skip_acts = res_skip_acts - - if i == 0: - output = skip_acts - else: - output = skip_acts + output - return self.end(output) - - -class WaveGlow(torch.nn.Module): - def __init__(self, n_mel_channels, n_flows, n_group, n_early_every, - n_early_size, WN_config): - super(WaveGlow, self).__init__() - - self.upsample = torch.nn.ConvTranspose1d(n_mel_channels, - n_mel_channels, - 1024, stride=256) - assert(n_group % 2 == 0) - self.n_flows = n_flows - self.n_group = n_group - self.n_early_every = n_early_every - self.n_early_size = n_early_size - self.WN = torch.nn.ModuleList() - self.convinv = torch.nn.ModuleList() - - n_half = int(n_group / 2) - - # Set up layers with the right sizes based on how many dimensions - # have been output already - n_remaining_channels = n_group - for k in range(n_flows): - if k % self.n_early_every == 0 and k > 0: - n_half = n_half - int(self.n_early_size / 2) - n_remaining_channels = n_remaining_channels - self.n_early_size - self.convinv.append(Invertible1x1Conv(n_remaining_channels)) - self.WN.append(WN(n_half, n_mel_channels * n_group, **WN_config)) - self.n_remaining_channels = n_remaining_channels - - def forward(self, forward_input): - """ - forward_input[0] = mel_spectrogram: batch x n_mel_channels x frames - forward_input[1] = audio: batch x time - """ - spect, audio = forward_input - - # Upsample spectrogram to size of audio - spect = self.upsample(spect) - assert(spect.size(2) >= audio.size(1)) - if spect.size(2) > audio.size(1): - spect = spect[:, :, :audio.size(1)] - - spect = spect.unfold(2, self.n_group, self.n_group).permute(0, 2, 1, 3) - spect = spect.contiguous().view(spect.size(0), spect.size(1), -1) - spect = spect.permute(0, 2, 1) - - audio = audio.unfold(1, self.n_group, self.n_group).permute(0, 2, 1) - output_audio = [] - log_s_list = [] - log_det_W_list = [] - - for k in range(self.n_flows): - if k % self.n_early_every == 0 and k > 0: - output_audio.append(audio[:, :self.n_early_size, :]) - audio = audio[:, self.n_early_size:, :] - - audio, log_det_W = self.convinv[k](audio) - log_det_W_list.append(log_det_W) - - n_half = int(audio.size(1) / 2) - audio_0 = audio[:, :n_half, :] - audio_1 = audio[:, n_half:, :] - - output = self.WN[k]((audio_0, spect)) - log_s = output[:, n_half:, :] - b = output[:, :n_half, :] - audio_1 = torch.exp(log_s) * audio_1 + b - log_s_list.append(log_s) - - audio = torch.cat([audio_0, audio_1], 1) - - output_audio.append(audio) - return torch.cat(output_audio, 1), log_s_list, log_det_W_list - - def infer(self, spect, sigma=1.0): - - spect = self.upsample(spect) - # trim conv artifacts. maybe pad spec to kernel multiple - time_cutoff = self.upsample.kernel_size[0] - self.upsample.stride[0] - spect = spect[:, :, :-time_cutoff] - - spect = spect.unfold(2, self.n_group, self.n_group).permute(0, 2, 1, 3) - spect = spect.contiguous().view(spect.size(0), spect.size(1), -1) - spect = spect.permute(0, 2, 1) - - audio = torch.randn(spect.size(0), - self.n_remaining_channels, - spect.size(2), device=spect.device).to(spect.dtype) - - audio = torch.autograd.Variable(sigma * audio) - - for k in reversed(range(self.n_flows)): - n_half = int(audio.size(1) / 2) - audio_0 = audio[:, :n_half, :] - audio_1 = audio[:, n_half:, :] - - output = self.WN[k]((audio_0, spect)) - s = output[:, n_half:, :] - b = output[:, :n_half, :] - audio_1 = (audio_1 - b) / torch.exp(s) - audio = torch.cat([audio_0, audio_1], 1) - - audio = self.convinv[k].infer(audio) - - if k % self.n_early_every == 0 and k > 0: - z = torch.randn(spect.size(0), self.n_early_size, spect.size( - 2), device=spect.device).to(spect.dtype) - audio = torch.cat((sigma * z, audio), 1) - - audio = audio.permute( - 0, 2, 1).contiguous().view( - audio.size(0), -1).data - return audio - - - def infer_onnx(self, spect, z, sigma=0.9): - - spect = self.upsample(spect) - # trim conv artifacts. maybe pad spec to kernel multiple - time_cutoff = self.upsample.kernel_size[0] - self.upsample.stride[0] - spect = spect[:, :, :-time_cutoff] - - length_spect_group = spect.size(2)//8 - mel_dim = 80 - batch_size = spect.size(0) - - spect = torch.squeeze(spect, 3) - spect = spect.view((batch_size, mel_dim, length_spect_group, self.n_group)) - spect = spect.permute(0, 2, 1, 3) - spect = spect.contiguous() - spect = spect.view((batch_size, length_spect_group, self.n_group*mel_dim)) - spect = spect.permute(0, 2, 1) - spect = torch.unsqueeze(spect, 3) - spect = spect.contiguous() - - audio = z[:, :self.n_remaining_channels, :, :] - z = z[:, self.n_remaining_channels:self.n_group, :, :] - - # Convert sigma to a torch tensor to ensure constant is exported properly - if audio.type() == 'torch.cuda.HalfTensor' or audio.type() == 'torch.HalfTensor': - sigma = torch.tensor(np.float16(sigma)) - else: - sigma = torch.tensor(np.float32(sigma)) - audio = sigma * audio - - for k in reversed(range(self.n_flows)): - n_half = int(audio.size(1) // 2) - audio_0 = audio[:, :n_half, :, :] - audio_1 = audio[:, n_half:(n_half+n_half), :, :] - - output = self.WN[k]((audio_0, spect)) - s = output[:, n_half:(n_half+n_half), :, :] - b = output[:, :n_half, :, :] - audio_1 = (audio_1 - b) / torch.exp(s) - audio = torch.cat([audio_0, audio_1], 1) - audio = self.convinv[k](audio) - - if k % self.n_early_every == 0 and k > 0: - audio = torch.cat((z[:, :self.n_early_size, :, :], audio), 1) - z = z[:, self.n_early_size:self.n_group, :, :] - - audio = torch.squeeze(audio, 3) - audio = audio.permute(0,2,1).contiguous().view(batch_size, (length_spect_group * self.n_group)) - - return audio - - - @staticmethod - def remove_weightnorm(model): - waveglow = model - for WN in waveglow.WN: - WN.start = torch.nn.utils.remove_weight_norm(WN.start) - WN.in_layers = remove(WN.in_layers) - WN.cond_layers = remove(WN.cond_layers) - WN.res_skip_layers = remove(WN.res_skip_layers) - return waveglow - - -def remove(conv_list): - new_conv_list = torch.nn.ModuleList() - for old_conv in conv_list: - old_conv = torch.nn.utils.remove_weight_norm(old_conv) - new_conv_list.append(old_conv) - return new_conv_list diff --git a/demo/experimental/HuggingFace-Diffusers/README.md b/demo/experimental/HuggingFace-Diffusers/README.md deleted file mode 100644 index d0e4e563..00000000 --- a/demo/experimental/HuggingFace-Diffusers/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# Introduction - -This demo notebook showcases the acceleration of Stable Diffusion pipeline using TensorRT through HuggingFace pipelines. - -# Setup - -### Clone the TensorRT OSS repository - -```bash -git clone git@github.com:NVIDIA/TensorRT.git -b release/9.3 --single-branch -cd TensorRT/demo/experimental/HuggingFace-Diffusers -``` - -### Launch TensorRT NGC container - -Install nvidia-docker using [these intructions](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker). Launch the docker container with the following command: - -```bash -docker run --rm -it --gpus all -p 8888:8888 -v $PWD:/workspace nvcr.io/nvidia/tensorrt:23.04-py3 /bin/bash -``` - -### Run Jupyter Notebook - -Install `jupyter` with: - -```bash -pip install jupyter -``` - -Launch the notebook within the container with: - -```bash -jupyter notebook --ip 0.0.0.0 TensorRT-diffusers-txt2img.ipynb --allow-root --no-browser -``` - -Follow the console output for the link to run the notebook on your host machine. diff --git a/demo/experimental/HuggingFace-Diffusers/TensorRT-diffusers-txt2img.ipynb b/demo/experimental/HuggingFace-Diffusers/TensorRT-diffusers-txt2img.ipynb deleted file mode 100644 index 23eb1492..00000000 --- a/demo/experimental/HuggingFace-Diffusers/TensorRT-diffusers-txt2img.ipynb +++ /dev/null @@ -1,1290 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "14941611", - "metadata": {}, - "source": [ - "# Stable Diffusion acceleration with TensorRT" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47c80a60", - "metadata": {}, - "outputs": [], - "source": [ - "# Copyright 2023 NVIDIA Corporation. All Rights Reserved.\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# http://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License.\n", - "# ==============================================================================" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "7a9c6d74", - "metadata": {}, - "source": [ - "# Install Prerequisites" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "b32d847b", - "metadata": {}, - "outputs": [], - "source": [ - "# Disable warnings if pip is run as root.\n", - "import os\n", - "os.environ['PIP_ROOT_USER_ACTION']='ignore'" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "cd9e73ba", - "metadata": {}, - "outputs": [], - "source": [ - "!python -m pip install --upgrade --quiet pip" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d0214ad4", - "metadata": {}, - "source": [ - "### Check NVIDIA GPU availability\n", - "\n", - "TensorRT acceleration for Diffusion models is available for NVIDIA Turing, Ampere, Ada Lovelace, and Hopper GPUs.\n", - "\n", - "For the following illustration we are using an A100 40GB GPU." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "362193c2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Wed May 3 04:32:55 2023 \n", - "+-----------------------------------------------------------------------------+\n", - "| NVIDIA-SMI 515.44 Driver Version: 515.44 CUDA Version: 12.0 |\n", - "|-------------------------------+----------------------+----------------------+\n", - "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", - "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", - "| | | MIG M. |\n", - "|===============================+======================+======================|\n", - "| 0 NVIDIA Graphics... Off | 00000000:01:00.0 Off | 0 |\n", - "| 65% 64C P0 81W / 200W | 86MiB / 40960MiB | 0% Default |\n", - "| | | Disabled |\n", - "+-------------------------------+----------------------+----------------------+\n", - " \n", - "+-----------------------------------------------------------------------------+\n", - "| Processes: |\n", - "| GPU GI CI PID Type Process name GPU Memory |\n", - "| ID ID Usage |\n", - "|=============================================================================|\n", - "+-----------------------------------------------------------------------------+\n" - ] - } - ], - "source": [ - "!nvidia-smi" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c79b497a", - "metadata": {}, - "source": [ - "### Install PyTorch 1.x\n", - "\n", - "NOTE: this is a temporary workaround for ONNX export issues observed in PyTorch 2.0," - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cabe1586", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install --upgrade --quiet \"torch <2.0.0\"" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "f07ee31c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PyTorch version: 1.14.0a0+44dac51\n" - ] - } - ], - "source": [ - "import torch\n", - "print(f\"PyTorch version: {torch.__version__}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "f26c0286", - "metadata": {}, - "source": [ - "### Install NVIDIA TensorRT\n", - "\n", - "TensorRT 8.6+ includes Stable Diffusion model optimizations out of the box." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "1e5b96f2", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install --upgrade --quiet \"tensorrt>=8.6\"" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "34a83eb3", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "TensorRT version: 8.6.1\n" - ] - } - ], - "source": [ - "import tensorrt\n", - "print(f\"TensorRT version: {tensorrt.__version__}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "3a14e192", - "metadata": {}, - "source": [ - "### Install TensorRT Utilities\n", - "\n", - "The TensorRT pipeline implementation in diffusers uses `polygraphy` API to reduce boilerplate code and simplify deployment of ONNX models in TensorRT.\n", - "\n", - "The pipeline also uses `onnx-graphsurgeon` and `onnxruntime` to sanitize (constant folding & shape inference) the exported ONNX models for deployment." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "465c891a", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install --extra-index-url https://pypi.ngc.nvidia.com --upgrade --quiet \"onnx-graphsurgeon\" \"onnxruntime\" \"polygraphy\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "3d157e2d", - "metadata": {}, - "source": [ - "### Install HuggingFace libraries\n", - "\n", - "HuggingFace `diffusers` library provides an implementation of the Stable Diffusion pipeline, including the constituent models. TensorRT txt2img pipeline was added in `diffusers` v0.16.0, which is a minimum requirement for the following illustration.\n", - "\n", - "The OpenAI CLIP text encoder and tokenizer models are obtained from HuggingFace `transformers` package." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "2c8f24c9", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install --upgrade --quiet \"accelerate\" \"diffusers>=0.16\" \"transformers\"" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "eef75c7f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "diffusers version: 0.16.1\n" - ] - } - ], - "source": [ - "import diffusers\n", - "print(f\"diffusers version: {diffusers.__version__}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "7ee62e33", - "metadata": {}, - "source": [ - "# Run Stable Diffusion\n", - "\n", - "The Stable Diffusion text2image pipeline takes a text prompt as an input and generates an image. A latent seed is used generate an initial random latent of size 64×64 and the text prompt is transformed to text embeddings of size 77×768 by a CLIP text encoder.\n", - "\n", - "Next the U-Net iteratively denoises the random latent representation over a user-specified number of steps while being conditioned on the text embeddings. The output of the U-Net in each iteration is a noise residual which is transformed into denoised latent image representation via a scheduler algorithm.\n", - "\n", - "For more information, see this [blog post](https://huggingface.co/blog/stable_diffusion)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "6892fdee", - "metadata": {}, - "source": [ - "### Import SD pipeline from diffusers\n", - "\n", - "`StableDiffusionPipeline` contains all models required for inference - a tokenizer, `CLIPTextModel` (text encoder), `UNet2DConditionModel` (denoising UNet), and `AutoencoderKL` (VAE decoder)." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "7d3abfe8", - "metadata": {}, - "outputs": [], - "source": [ - "from diffusers.pipelines.stable_diffusion import StableDiffusionPipeline" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d68630b1", - "metadata": {}, - "source": [ - "### Initialize DDIM scheduler\n", - "\n", - "A custom noise scheduler can be specified by the user. In our example we use [DDIM](https://huggingface.co/docs/diffusers/main/en/api/schedulers/ddim)." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "8c0df48e", - "metadata": {}, - "outputs": [], - "source": [ - "from diffusers import DDIMScheduler\n", - "scheduler = DDIMScheduler.from_pretrained(\"stabilityai/stable-diffusion-2-1\", subfolder=\"scheduler\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "12fbcdc7", - "metadata": {}, - "source": [ - "### Initialize native txt2img pipeline" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "0e81860f", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e84d2ea17a5247fea357a7499fbc9cc3", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Fetching 11 files: 0%| | 0/11 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "prompt = \"a beautiful photograph of Mt. Fuji during cherry blossom\"\n", - "image = pipe(prompt).images[0]\n", - "display(image)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "1b22dd3c", - "metadata": {}, - "source": [ - "# Run Stable Diffusion with TensorRT" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "709ed5d5", - "metadata": {}, - "source": [ - "### Initialize TensorRT txt2img pipeline\n", - "\n", - "TensorRT pipeline initialization is similar to the native pipeline, with a single extra option to specify the path to a [python file containing the TensorRT implementation](https://github.com/huggingface/diffusers/blob/main/examples/community/stable_diffusion_tensorrt_txt2img.py) in diffusers.\n", - "`custom_pipeline=\"stable_diffusion_tensorrt_txt2img\"`" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "fbd7f7a8", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/huggingface_hub/file_download.py:649: FutureWarning: 'cached_download' is the legacy way to download files from the HF hub, please consider upgrading to 'hf_hub_download'\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "pipe_trt = StableDiffusionPipeline.from_pretrained(\n", - " \"stabilityai/stable-diffusion-2-1\",\n", - " custom_pipeline=\"stable_diffusion_tensorrt_txt2img\",\n", - " revision='fp16',\n", - " torch_dtype=torch.float16,\n", - " scheduler=scheduler,\n", - " image_height=512,\n", - " image_width=512)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "4e7e6c2e", - "metadata": {}, - "source": [ - "### Specify cache folder name\n", - "\n", - "The ONNX models and TensorRT engines generated during the first inference run will be cached in this folder to speed up subsequent runs." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "9d018680", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b0caad71f89a45ceb6e6f790bfc28f71", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Fetching 16 files: 0%| | 0/16 [00:00= 64:\n", - "/usr/local/lib/python3.8/dist-packages/diffusers/models/unet_2d_condition.py:793: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", - " if not return_dict:\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "========== Diagnostic Run torch.onnx.export version 1.14.0a0+44dac51 ===========\n", - "verbose: False, log level: Level.ERROR\n", - "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Generating optimizing model: /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/onnx/unet.opt.onnx\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Folding Constants | Pass 1\n", - "[I] Total Nodes | Original: 7757, After Folding: 5379 | 2378 Nodes Folded\n", - "[I] Folding Constants | Pass 2\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-03 04:35:05.063804462 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.2/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.063835442 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.2/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.063851254 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.2/Unsqueeze_6\n", - "2023-05-03 04:35:05.063860151 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.2/Unsqueeze_2\n", - "2023-05-03 04:35:05.063874574 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.1/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.063885250 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.1/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.063899247 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.1/Unsqueeze_6\n", - "2023-05-03 04:35:05.063907147 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.1/Unsqueeze_2\n", - "2023-05-03 04:35:05.063921177 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.0/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.063931750 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.0/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.063945862 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.0/Unsqueeze_6\n", - "2023-05-03 04:35:05.063953915 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.0/Unsqueeze_2\n", - "2023-05-03 04:35:05.063968177 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.2/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.063979539 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.2/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.063993087 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.2/Unsqueeze_6\n", - "2023-05-03 04:35:05.064000857 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.2/Unsqueeze_2\n", - "2023-05-03 04:35:05.064014245 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.1/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.064024708 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.1/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.064038411 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.1/Unsqueeze_6\n", - "2023-05-03 04:35:05.064046432 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.1/Unsqueeze_2\n", - "2023-05-03 04:35:05.064060263 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.0/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.064070926 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.0/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.064084326 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.0/Unsqueeze_6\n", - "2023-05-03 04:35:05.064092365 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.0/Unsqueeze_2\n", - "2023-05-03 04:35:05.064105810 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.2/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.064116164 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.2/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.064132918 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.2/Unsqueeze_6\n", - "2023-05-03 04:35:05.064140796 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.2/Unsqueeze_2\n", - "2023-05-03 04:35:05.064154446 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.1/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.064165905 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.1/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.064179209 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.1/Unsqueeze_6\n", - "2023-05-03 04:35:05.064187342 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.1/Unsqueeze_2\n", - "2023-05-03 04:35:05.064201199 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.0/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.064211740 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.0/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.064225424 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.0/Unsqueeze_6\n", - "2023-05-03 04:35:05.064233327 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.0/Unsqueeze_2\n", - "2023-05-03 04:35:05.064247287 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /mid_block/attentions.0/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.064257595 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /mid_block/attentions.0/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.064270903 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /mid_block/attentions.0/Unsqueeze_6\n", - "2023-05-03 04:35:05.064279133 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /mid_block/attentions.0/Unsqueeze_2\n", - "2023-05-03 04:35:05.064293283 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.1/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.064303776 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.1/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.064317285 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.1/Unsqueeze_6\n", - "2023-05-03 04:35:05.064325039 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.1/Unsqueeze_2\n", - "2023-05-03 04:35:05.064339012 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.0/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.064349129 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.0/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.064361976 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.0/Unsqueeze_6\n", - "2023-05-03 04:35:05.064369583 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.0/Unsqueeze_2\n", - "2023-05-03 04:35:05.064383610 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.1/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.064394922 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.1/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.064407734 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.1/Unsqueeze_6\n", - "2023-05-03 04:35:05.064415531 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.1/Unsqueeze_2\n", - "2023-05-03 04:35:05.064429270 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.0/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.064439251 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.0/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.064452187 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.0/Unsqueeze_6\n", - "2023-05-03 04:35:05.064459693 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.0/Unsqueeze_2\n", - "2023-05-03 04:35:05.064473401 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.1/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.064483399 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.1/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.064495818 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.1/Unsqueeze_6\n", - "2023-05-03 04:35:05.064505291 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.1/Unsqueeze_2\n", - "2023-05-03 04:35:05.064519225 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.0/transformer_blocks.0/attn2/Unsqueeze_23\n", - "2023-05-03 04:35:05.064529037 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.0/transformer_blocks.0/attn1/Unsqueeze_23\n", - "2023-05-03 04:35:05.064541710 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.0/Unsqueeze_6\n", - "2023-05-03 04:35:05.064549728 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.0/Unsqueeze_2\n", - "2023-05-03 04:35:05.064556692 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.2/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064563536 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.1/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064570257 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.0/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064576986 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.2/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064583887 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.1/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064590714 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.0/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064597349 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.2/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064605340 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.1/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064612284 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.0/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064619173 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /mid_block/attentions.0/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064626141 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.1/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064633046 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.0/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064639818 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.1/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064646608 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.0/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064653372 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.1/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064660184 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.0/transformer_blocks.0/attn2/Unsqueeze_20\n", - "2023-05-03 04:35:05.064719945 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.2/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.064730107 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.2/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.064737080 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.2/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.064744647 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.2/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.064751229 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.1/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.064758661 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.1/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.064765408 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.1/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.064772857 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.1/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.064779651 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.0/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.064787086 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.0/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.064793967 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.0/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.064801286 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.3/attentions.0/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.064808977 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.2/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.064816235 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.2/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.064822982 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.2/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.064830202 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.2/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.064836835 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.1/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.064844097 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.1/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.064850828 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.1/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.064858220 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.1/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.064864754 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.0/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.064872009 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.0/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.064878486 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.0/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.064885621 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.2/attentions.0/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.064892385 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.2/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.064899754 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.2/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.064906377 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.2/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.064913714 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.2/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.064920540 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.1/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.064927841 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.1/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.064934645 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.1/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.064941859 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.1/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.064948485 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.0/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.064956392 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.0/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.064963043 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.0/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.064970411 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /up_blocks.1/attentions.0/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.064977125 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /mid_block/attentions.0/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.064984371 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /mid_block/attentions.0/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.064990976 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /mid_block/attentions.0/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.064998100 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /mid_block/attentions.0/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.065005010 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.1/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.065012308 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.1/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.065018874 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.1/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.065026019 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.1/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.065032681 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.0/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.065039872 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.0/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.065046731 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.0/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.065086682 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.2/attentions.0/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.065095119 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.1/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.065102513 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.1/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.065109193 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.1/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.065116469 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.1/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.065123211 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.0/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.065130426 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.0/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.065138081 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.0/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.065145644 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.1/attentions.0/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.065152308 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.1/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.065159575 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.1/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.065166147 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.1/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.065173277 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.1/transformer_blocks.0/attn2/Unsqueeze_7\n", - "2023-05-03 04:35:05.065179792 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.0/transformer_blocks.0/attn2/Unsqueeze_16\n", - "2023-05-03 04:35:05.065187044 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.0/transformer_blocks.0/attn2/Unsqueeze_13\n", - "2023-05-03 04:35:05.065193654 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.0/transformer_blocks.0/attn2/Unsqueeze_10\n", - "2023-05-03 04:35:05.065200911 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /down_blocks.0/attentions.0/transformer_blocks.0/attn2/Unsqueeze_7\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Total Nodes | Original: 5379, After Folding: 4208 | 1171 Nodes Folded\n", - "[I] Folding Constants | Pass 3\n", - "[I] Total Nodes | Original: 4208, After Folding: 4208 | 0 Nodes Folded\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Building Engines...\n", - "Engine build can take a while to complete\n", - "Exporting model: /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/onnx/vae.onnx\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "========== Diagnostic Run torch.onnx.export version 1.14.0a0+44dac51 ===========\n", - "verbose: False, log level: Level.ERROR\n", - "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Generating optimizing model: /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/onnx/vae.opt.onnx\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Folding Constants | Pass 1\n", - "[I] Total Nodes | Original: 671, After Folding: 500 | 171 Nodes Folded\n", - "[I] Folding Constants | Pass 2\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-03 04:35:36.443555280 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /decoder/mid_block/attentions.0/Unsqueeze_29\n", - "2023-05-03 04:35:36.443582656 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /decoder/mid_block/attentions.0/Unsqueeze_26\n", - "2023-05-03 04:35:36.443597966 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /decoder/mid_block/attentions.0/Unsqueeze_31\n", - "2023-05-03 04:35:36.443606789 [W:onnxruntime:, unsqueeze_elimination.cc:20 Apply] UnsqueezeElimination cannot remove node /decoder/mid_block/attentions.0/Unsqueeze_1\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Total Nodes | Original: 500, After Folding: 471 | 29 Nodes Folded\n", - "[I] Folding Constants | Pass 3\n", - "[I] Total Nodes | Original: 471, After Folding: 471 | 0 Nodes Folded\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Building TensorRT engine for /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/onnx/clip.opt.onnx: /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/clip.plan\n", - "[libprotobuf WARNING google/protobuf/io/coded_stream.cc:604] Reading dangerously large protocol message. If the message turns out to be larger than 2147483647 bytes, parsing will be halted for security reasons. To increase the limit (or to disable these warnings), see CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h.\n", - "[libprotobuf WARNING google/protobuf/io/coded_stream.cc:81] The total number of bytes read was 681566094\n", - "[libprotobuf WARNING google/protobuf/io/coded_stream.cc:604] Reading dangerously large protocol message. If the message turns out to be larger than 2147483647 bytes, parsing will be halted for security reasons. To increase the limit (or to disable these warnings), see CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h.\n", - "[libprotobuf WARNING google/protobuf/io/coded_stream.cc:81] The total number of bytes read was 681566094\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[W] onnx2trt_utils.cpp:374: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.\n", - "[I] Configuring with profiles: [Profile().add('input_ids', min=(1, 77), opt=(1, 77), max=(4, 77))]\n", - "[I] Loading tactic timing cache from /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/timing_cache\n", - "[W] Timing cache file /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/timing_cache not found, falling back to empty timing cache.\n", - "[I] Building engine with configuration:\n", - " Flags | [FP16]\n", - " Engine Capability | EngineCapability.DEFAULT\n", - " Memory Pools | [WORKSPACE: 40535.88 MiB, TACTIC_DRAM: 40535.88 MiB]\n", - " Tactic Sources | []\n", - " Profiling Verbosity | ProfilingVerbosity.DETAILED\n", - " Preview Features | [DISABLE_EXTERNAL_TACTIC_SOURCES_FOR_CORE_0805]\n", - "[W] kFASTER_DYNAMIC_SHAPES_0805 preview feature is disabled.\n", - "[W] TensorRT encountered issues when converting weights between types and that could affect accuracy.\n", - "[W] If this is not the desired behavior, please modify the weights or retrain with regularization to adjust the magnitude of the weights.\n", - "[W] Check verbose logs for the list of affected weights.\n", - "[W] - 225 weights are affected by this issue: Detected subnormal FP16 values.\n", - "[I] Finished engine building in 146.532 seconds\n", - "[I] Saving tactic timing cache to /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/timing_cache\n", - "[I] Saving engine to /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/clip.plan\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Building TensorRT engine for /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/onnx/unet.opt.onnx: /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/unet.plan\n", - "[libprotobuf WARNING google/protobuf/io/coded_stream.cc:604] Reading dangerously large protocol message. If the message turns out to be larger than 2147483647 bytes, parsing will be halted for security reasons. To increase the limit (or to disable these warnings), see CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h.\n", - "[libprotobuf WARNING google/protobuf/io/coded_stream.cc:81] The total number of bytes read was 1733934759\n", - "[libprotobuf WARNING google/protobuf/io/coded_stream.cc:604] Reading dangerously large protocol message. If the message turns out to be larger than 2147483647 bytes, parsing will be halted for security reasons. To increase the limit (or to disable these warnings), see CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h.\n", - "[libprotobuf WARNING google/protobuf/io/coded_stream.cc:81] The total number of bytes read was 1733934759\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[W] onnx2trt_utils.cpp:400: One or more weights outside the range of INT32 was clamped\n", - "[I] Configuring with profiles: [Profile().add('sample', min=(2, 4, 96, 96), opt=(2, 4, 96, 96), max=(8, 4, 96, 96)).add('encoder_hidden_states', min=(2, 77, 1024), opt=(2, 77, 1024), max=(8, 77, 1024)).add('timestep', min=[1], opt=[1], max=[1])]\n", - "[I] Loading tactic timing cache from /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/timing_cache\n", - "[I] Building engine with configuration:\n", - " Flags | [FP16]\n", - " Engine Capability | EngineCapability.DEFAULT\n", - " Memory Pools | [WORKSPACE: 40535.88 MiB, TACTIC_DRAM: 40535.88 MiB]\n", - " Tactic Sources | []\n", - " Profiling Verbosity | ProfilingVerbosity.DETAILED\n", - " Preview Features | [DISABLE_EXTERNAL_TACTIC_SOURCES_FOR_CORE_0805]\n", - "[W] - 272 weights are affected by this issue: Detected subnormal FP16 values.\n", - "[I] Finished engine building in 1032.233 seconds\n", - "[I] Saving tactic timing cache to /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/timing_cache\n", - "[I] Saving engine to /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/unet.plan\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Building TensorRT engine for /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/onnx/vae.opt.onnx: /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/vae.plan\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Configuring with profiles: [Profile().add('latent', min=(1, 4, 96, 96), opt=(1, 4, 96, 96), max=(4, 4, 96, 96))]\n", - "[I] Loading tactic timing cache from /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/timing_cache\n", - "[I] Building engine with configuration:\n", - " Flags | [FP16]\n", - " Engine Capability | EngineCapability.DEFAULT\n", - " Memory Pools | [WORKSPACE: 40535.88 MiB, TACTIC_DRAM: 40535.88 MiB]\n", - " Tactic Sources | []\n", - " Profiling Verbosity | ProfilingVerbosity.DETAILED\n", - " Preview Features | [DISABLE_EXTERNAL_TACTIC_SOURCES_FOR_CORE_0805]\n", - "[W] - 4 weights are affected by this issue: Detected subnormal FP16 values.\n", - "[I] Finished engine building in 204.808 seconds\n", - "[I] Saving tactic timing cache to /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/timing_cache\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading TensorRT engine: /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/clip.plan\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Saving engine to /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/vae.plan\n", - "[I] Loading bytes from /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/clip.plan\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading TensorRT engine: /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/unet.plan\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Loading bytes from /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/unet.plan\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading TensorRT engine: /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/vae.plan\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Loading bytes from /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/vae.plan\n" - ] - } - ], - "source": [ - "pipe_trt = pipe_trt.to(\"cuda\")" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "c7defb86", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running inference on device: cuda:0\n", - "Loading TensorRT engine: /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/clip.plan\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Loading bytes from /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/clip.plan\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading TensorRT engine: /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/unet.plan\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Loading bytes from /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/unet.plan\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading TensorRT engine: /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/vae.plan\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Loading bytes from /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1/snapshots/f7f33030acc57428be85fbec092c37a78231d75a/engine/vae.plan\n" - ] - } - ], - "source": [ - "pipe_trt = pipe_trt.to(\"cuda\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2bdd0eaa", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/root/.cache/huggingface/modules/diffusers_modules/git/stable_diffusion_tensorrt_txt2img.py:907: FutureWarning: Accessing config attribute `in_channels` directly via 'UNet2DConditionModel' object attribute is deprecated. Please access 'in_channels' over 'UNet2DConditionModel's config object instead, e.g. 'unet.config.in_channels'.\n", - " num_channels_latents = self.unet.in_channels\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "prompt = \"a beautiful photograph of Mt. Fuji during cherry blossom\"\n", - "\n", - "# warm up runs to stabilize performance benchmarking\n", - "num_warm_up_steps=5\n", - "for _ in range(num_warm_up_steps):\n", - " _ = pipe_trt(prompt)\n", - "\n", - "image = pipe_trt(prompt).images[0]\n", - "display(image)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docker/build.sh b/docker/build.sh index b24029ae..33f52f55 100755 --- a/docker/build.sh +++ b/docker/build.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/docker/launch.sh b/docker/launch.sh index 2fe9d299..c1b5d05d 100755 --- a/docker/launch.sh +++ b/docker/launch.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/docker/rockylinux8.Dockerfile b/docker/rockylinux8.Dockerfile new file mode 100644 index 00000000..dca7208c --- /dev/null +++ b/docker/rockylinux8.Dockerfile @@ -0,0 +1,105 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +ARG CUDA_VERSION=12.4.0 + +FROM nvidia/cuda:${CUDA_VERSION}-devel-rockylinux8 +LABEL maintainer="NVIDIA CORPORATION" + +ENV CUDA_VERSION_MAJOR_MINOR=12.2 +ENV NV_CUDNN_VERSION 8.9.6.50-1 +ENV NV_CUDNN_PACKAGE libcudnn8-${NV_CUDNN_VERSION}.cuda12.2 +ENV NV_CUDNN_PACKAGE_DEV libcudnn8-devel-${NV_CUDNN_VERSION}.cuda12.2 + +ENV TRT_VERSION 10.0.1.6 +SHELL ["/bin/bash", "-c"] + +RUN dnf install -y \ + ${NV_CUDNN_PACKAGE} \ + ${NV_CUDNN_PACKAGE_DEV} \ + && dnf clean all \ + && rm -rf /var/cache/dnf/* + +# Setup user account +ARG uid=1000 +ARG gid=1000 +RUN groupadd -r -f -g ${gid} trtuser && useradd -o -r -l -u ${uid} -g ${gid} -ms /bin/bash trtuser +RUN usermod -aG wheel trtuser +RUN echo 'trtuser:nvidia' | chpasswd +RUN mkdir -p /workspace && chown trtuser /workspace + +# Install requried packages +RUN dnf -y groupinstall "Development Tools" +RUN dnf -y install \ + openssl-devel \ + bzip2-devel \ + libffi-devel \ + wget \ + perl-core \ + git \ + pkg-config \ + unzip \ + sudo + +# Install python3 +RUN dnf install -y python38 python38-devel &&\ + cd /usr/bin && ln -s /usr/bin/pip3.8 pip; + + +# Install TensorRT +RUN if [ "${CUDA_VERSION:0:2}" = "11" ]; then \ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib64 \ + && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp38-none-linux_x86_64.whl ;\ +elif [ "${CUDA_VERSION:0:2}" = "12" ]; then \ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib64 \ + && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp38-none-linux_x86_64.whl ;\ +else \ + echo "Invalid CUDA_VERSION"; \ + exit 1; \ +fi + +# Install PyPI packages +RUN pip install --upgrade pip +RUN pip install setuptools>=41.0.0 +RUN pip install numpy +RUN pip install jupyter jupyterlab + +# Install Cmake +RUN cd /tmp && \ + wget https://github.com/Kitware/CMake/releases/download/v3.14.4/cmake-3.14.4-Linux-x86_64.sh && \ + chmod +x cmake-3.14.4-Linux-x86_64.sh && \ + ./cmake-3.14.4-Linux-x86_64.sh --prefix=/usr/local --exclude-subdir --skip-license && \ + rm ./cmake-3.14.4-Linux-x86_64.sh + +# Download NGC client +RUN cd /usr/local/bin && wget https://ngc.nvidia.com/downloads/ngccli_cat_linux.zip && unzip ngccli_cat_linux.zip && chmod u+x ngc-cli/ngc && rm ngccli_cat_linux.zip ngc-cli.md5 && echo "no-apikey\nascii\n" | ngc-cli/ngc config set + +RUN ln -s /usr/bin/python3 /usr/bin/python + +# Set environment and working directory +ENV TRT_LIBPATH /usr/lib64 +ENV TRT_OSSPATH /workspace/TensorRT +ENV PATH="${PATH}:/usr/local/bin/ngc-cli" +ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${TRT_OSSPATH}/build/out:${TRT_LIBPATH}" +WORKDIR /workspace + +USER trtuser +RUN ["/bin/bash"] diff --git a/docker/rockylinux9.Dockerfile b/docker/rockylinux9.Dockerfile new file mode 100644 index 00000000..ff00512a --- /dev/null +++ b/docker/rockylinux9.Dockerfile @@ -0,0 +1,104 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +ARG CUDA_VERSION=12.4.0 + +FROM nvidia/cuda:${CUDA_VERSION}-devel-rockylinux9 +LABEL maintainer="NVIDIA CORPORATION" + +ENV CUDA_VERSION_MAJOR_MINOR=12.2 +ENV NV_CUDNN_VERSION 8.9.6.50-1 +ENV NV_CUDNN_PACKAGE libcudnn8-${NV_CUDNN_VERSION}.cuda12.2 +ENV NV_CUDNN_PACKAGE_DEV libcudnn8-devel-${NV_CUDNN_VERSION}.cuda12.2 + +ENV TRT_VERSION 10.0.1.6 +SHELL ["/bin/bash", "-c"] + +RUN dnf install -y \ + ${NV_CUDNN_PACKAGE} \ + ${NV_CUDNN_PACKAGE_DEV} \ + && dnf clean all \ + && rm -rf /var/cache/dnf/* + +# Setup user account +ARG uid=1000 +ARG gid=1000 +RUN groupadd -r -f -g ${gid} trtuser && useradd -o -r -l -u ${uid} -g ${gid} -ms /bin/bash trtuser +RUN usermod -aG wheel trtuser +RUN echo 'trtuser:nvidia' | chpasswd +RUN mkdir -p /workspace && chown trtuser /workspace + +# Install python3 +RUN dnf install -y python39 python3-devel && \ + cd /usr/bin && rm pip && ln -s /usr/bin/pip3.9 pip; + +# Install PyPI packages +RUN pip install --upgrade pip +RUN pip install setuptools>=41.0.0 +RUN pip install numpy +RUN pip install jupyter jupyterlab + +# Install requried packages +RUN dnf -y groupinstall "Development Tools" +RUN dnf -y install \ + openssl-devel \ + bzip2-devel \ + libffi-devel \ + wget \ + perl-core \ + git \ + pkg-config \ + unzip \ + sudo + +# Install TensorRT +RUN if [ "${CUDA_VERSION:0:2}" = "11" ]; then \ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib64 \ + && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp39-none-linux_x86_64.whl ;\ +elif [ "${CUDA_VERSION:0:2}" = "12" ]; then \ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib64 \ + && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp39-none-linux_x86_64.whl ;\ +else \ + echo "Invalid CUDA_VERSION"; \ + exit 1; \ +fi + +# Install Cmake +RUN cd /tmp && \ + wget https://github.com/Kitware/CMake/releases/download/v3.14.4/cmake-3.14.4-Linux-x86_64.sh && \ + chmod +x cmake-3.14.4-Linux-x86_64.sh && \ + ./cmake-3.14.4-Linux-x86_64.sh --prefix=/usr/local --exclude-subdir --skip-license && \ + rm ./cmake-3.14.4-Linux-x86_64.sh + +# Download NGC client +RUN cd /usr/local/bin && wget https://ngc.nvidia.com/downloads/ngccli_cat_linux.zip && unzip ngccli_cat_linux.zip && chmod u+x ngc-cli/ngc && rm ngccli_cat_linux.zip ngc-cli.md5 && echo "no-apikey\nascii\n" | ngc-cli/ngc config set + +RUN ln -s /usr/bin/python3 /usr/bin/python + +# Set environment and working directory +ENV TRT_LIBPATH /usr/lib64 +ENV TRT_OSSPATH /workspace/TensorRT +ENV PATH="${PATH}:/usr/local/bin/ngc-cli" +ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${TRT_OSSPATH}/build/out:${TRT_LIBPATH}" +WORKDIR /workspace + +USER trtuser +RUN ["/bin/bash"] diff --git a/docker/ubuntu-20.04.Dockerfile b/docker/ubuntu-20.04.Dockerfile index 0049d4c2..7498c124 100644 --- a/docker/ubuntu-20.04.Dockerfile +++ b/docker/ubuntu-20.04.Dockerfile @@ -15,7 +15,7 @@ # limitations under the License. # -ARG CUDA_VERSION=12.3.2 +ARG CUDA_VERSION=12.4.0 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 LABEL maintainer="NVIDIA CORPORATION" @@ -28,7 +28,7 @@ ENV CUDA_VERSION_MAJOR_MINOR=12.2 ENV NV_CUDNN_PACKAGE "libcudnn8=$NV_CUDNN_VERSION-1+cuda${CUDA_VERSION_MAJOR_MINOR}" ENV NV_CUDNN_PACKAGE_DEV "libcudnn8-dev=$NV_CUDNN_VERSION-1+cuda${CUDA_VERSION_MAJOR_MINOR}" -ENV TRT_VERSION 10.0.0.6 +ENV TRT_VERSION 10.0.1.6 SHELL ["/bin/bash", "-c"] RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -84,15 +84,15 @@ RUN apt-get install -y --no-install-recommends \ # Install TensorRT RUN if [ "${CUDA_VERSION:0:2}" = "11" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/tars/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && tar -xf TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && cp -a TensorRT-10.0.0.6/lib/*.so* /usr/lib/x86_64-linux-gnu \ - && pip install TensorRT-10.0.0.6/python/tensorrt-10.0.0b6-cp38-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib/x86_64-linux-gnu \ + && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp38-none-linux_x86_64.whl ;\ elif [ "${CUDA_VERSION:0:2}" = "12" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/tars/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ - && tar -xf TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ - && cp -a TensorRT-10.0.0.6/lib/*.so* /usr/lib/x86_64-linux-gnu \ - && pip install TensorRT-10.0.0.6/python/tensorrt-10.0.0b6-cp38-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib/x86_64-linux-gnu \ + && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp38-none-linux_x86_64.whl ;\ else \ echo "Invalid CUDA_VERSION"; \ exit 1; \ diff --git a/docker/ubuntu-22.04-aarch64.Dockerfile b/docker/ubuntu-22.04-aarch64.Dockerfile new file mode 100644 index 00000000..ebac9297 --- /dev/null +++ b/docker/ubuntu-22.04-aarch64.Dockerfile @@ -0,0 +1,112 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +ARG CUDA_VERSION=12.4.0 + +# Multi-arch container support available in non-cudnn containers. +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 + +ENV TRT_VERSION 10.0.1.6 +SHELL ["/bin/bash", "-c"] + +# Setup user account +ARG uid=1000 +ARG gid=1000 +RUN groupadd -r -f -g ${gid} trtuser && useradd -o -r -l -u ${uid} -g ${gid} -ms /bin/bash trtuser +RUN usermod -aG sudo trtuser +RUN echo 'trtuser:nvidia' | chpasswd +RUN mkdir -p /workspace && chown trtuser /workspace + +# Required to build Ubuntu 20.04 without user prompts with DLFW container +ENV DEBIAN_FRONTEND=noninteractive + +# Update CUDA signing key +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/sbsa/3bf863cc.pub + +# Install requried libraries +RUN apt-get update && apt-get install -y software-properties-common +RUN add-apt-repository ppa:ubuntu-toolchain-r/test +RUN apt-get update && apt-get install -y --no-install-recommends \ + libcurl4-openssl-dev \ + wget \ + git \ + pkg-config \ + sudo \ + ssh \ + libssl-dev \ + pbzip2 \ + pv \ + bzip2 \ + unzip \ + devscripts \ + lintian \ + fakeroot \ + dh-make \ + build-essential + +# Install python3 +RUN apt-get install -y --no-install-recommends \ + python3 \ + python3-pip \ + python3-dev \ + python3-wheel &&\ + cd /usr/local/bin &&\ + ln -s /usr/bin/python3 python &&\ + ln -s /usr/bin/pip3 pip; + +# Install TensorRT. This will also pull in CUDNN +RUN ver="${CUDA_VERSION%.*}" &&\ + if [ "${ver%.*}" = "12" ] ; then \ + ver="12.4"; \ + fi &&\ + v="${TRT_VERSION}-1+cuda${ver}" &&\ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/sbsa/3bf863cc.pub &&\ + apt-get update &&\ + sudo apt-get -y install libnvinfer10=${v} libnvonnxparsers10=${v} libnvinfer-plugin10=${v} \ + libnvinfer-dev=${v} libnvonnxparsers-dev=${v} libnvinfer-plugin-dev=${v} \ + python3-libnvinfer=${v} libnvinfer-dispatch10=${v} libnvinfer-dispatch-dev=${v} libnvinfer-lean10=${v} \ + libnvinfer-lean-dev=${v} libnvinfer-vc-plugin10=${v} libnvinfer-vc-plugin-dev=${v} \ + libnvinfer-headers-dev=${v} libnvinfer-headers-plugin-dev=${v}; + +# Install Cmake +RUN cd /tmp && \ + wget https://github.com/Kitware/CMake/releases/download/v3.21.4/cmake-3.21.4-linux-aarch64.sh && \ + chmod +x cmake-3.21.4-linux-aarch64.sh && \ + ./cmake-3.21.4-linux-aarch64.sh --prefix=/usr/local --exclude-subdir --skip-license && \ + rm ./cmake-3.21.4-linux-aarch64.sh + +# Install PyPI packages +RUN pip3 install --upgrade pip +RUN pip3 install setuptools>=41.0.0 +COPY requirements.txt /tmp/requirements.txt +RUN pip3 install -r /tmp/requirements.txt +RUN pip3 install jupyter jupyterlab +# Workaround to remove numpy installed with tensorflow +RUN pip3 install --upgrade numpy + +# Download NGC client +RUN cd /usr/local/bin && wget https://ngc.nvidia.com/downloads/ngccli_arm64.zip && unzip ngccli_arm64.zip && chmod u+x ngc-cli/ngc && rm ngccli_arm64.zip ngc-cli.md5 && echo "no-apikey\nascii\n" | ngc-cli/ngc config set + +# Set environment and working directory +ENV TRT_LIBPATH /usr/lib/aarch64-linux-gnu/ +ENV TRT_OSSPATH /workspace/TensorRT +ENV PATH="${PATH}:/usr/local/bin/ngc-cli" +ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${TRT_OSSPATH}/build/out:${TRT_LIBPATH}" +WORKDIR /workspace + +USER trtuser +RUN ["/bin/bash"] diff --git a/docker/ubuntu-22.04.Dockerfile b/docker/ubuntu-22.04.Dockerfile index ebe90f71..a7e0d6a1 100644 --- a/docker/ubuntu-22.04.Dockerfile +++ b/docker/ubuntu-22.04.Dockerfile @@ -15,7 +15,7 @@ # limitations under the License. # -ARG CUDA_VERSION=12.3.2 +ARG CUDA_VERSION=12.4.0 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 LABEL maintainer="NVIDIA CORPORATION" @@ -28,7 +28,7 @@ ENV CUDA_VERSION_MAJOR_MINOR=12.2 ENV NV_CUDNN_PACKAGE "libcudnn8=$NV_CUDNN_VERSION-1+cuda${CUDA_VERSION_MAJOR_MINOR}" ENV NV_CUDNN_PACKAGE_DEV "libcudnn8-dev=$NV_CUDNN_VERSION-1+cuda${CUDA_VERSION_MAJOR_MINOR}" -ENV TRT_VERSION 10.0.0.6 +ENV TRT_VERSION 10.0.1.6 SHELL ["/bin/bash", "-c"] RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -49,7 +49,7 @@ RUN mkdir -p /workspace && chown trtuser /workspace ENV DEBIAN_FRONTEND=noninteractive # Update CUDA signing key -RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub # Install requried libraries RUN apt-get update && apt-get install -y software-properties-common @@ -84,15 +84,15 @@ RUN apt-get install -y --no-install-recommends \ # Install TensorRT RUN if [ "${CUDA_VERSION:0:2}" = "11" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/tars/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && tar -xf TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && cp -a TensorRT-10.0.0.6/lib/*.so* /usr/lib/x86_64-linux-gnu \ - && pip install TensorRT-10.0.0.6/python/tensorrt-10.0.0b6-cp310-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib/x86_64-linux-gnu \ + && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp310-none-linux_x86_64.whl ;\ elif [ "${CUDA_VERSION:0:2}" = "12" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/tars/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ - && tar -xf TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ - && cp -a TensorRT-10.0.0.6/lib/*.so* /usr/lib/x86_64-linux-gnu \ - && pip install TensorRT-10.0.0.6/python/tensorrt-10.0.0b6-cp310-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib/x86_64-linux-gnu \ + && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp310-none-linux_x86_64.whl ;\ else \ echo "Invalid CUDA_VERSION"; \ exit 1; \ diff --git a/docker/ubuntu-cross-aarch64.Dockerfile b/docker/ubuntu-cross-aarch64.Dockerfile new file mode 100644 index 00000000..eb2e100b --- /dev/null +++ b/docker/ubuntu-cross-aarch64.Dockerfile @@ -0,0 +1,134 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +ARG CUDA_VERSION=12.4.0 +ARG OS_VERSION=22.04 + +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${OS_VERSION} +LABEL maintainer="NVIDIA CORPORATION" + +ENV TRT_VERSION 10.0.1.6 +ENV DEBIAN_FRONTEND=noninteractive + +ARG uid=1000 +ARG gid=1000 +RUN groupadd -r -f -g ${gid} trtuser && useradd -o -r -l -u ${uid} -g ${gid} -ms /bin/bash trtuser +RUN usermod -aG sudo trtuser +RUN echo 'trtuser:nvidia' | chpasswd +RUN mkdir -p /workspace && chown trtuser /workspace + +# Install requried libraries +RUN apt-get update && apt-get install -y software-properties-common +RUN add-apt-repository ppa:ubuntu-toolchain-r/test +RUN apt-get update && apt-get install -y --no-install-recommends \ + libcurl4-openssl-dev \ + wget \ + git \ + pkg-config \ + python3 \ + python3-pip \ + python3-dev \ + python3-wheel \ + sudo \ + ssh \ + pbzip2 \ + pv \ + bzip2 \ + unzip \ + build-essential + +RUN cd /usr/local/bin &&\ + ln -s /usr/bin/python3 python &&\ + ln -s /usr/bin/pip3 pip +RUN pip3 install --upgrade pip +RUN pip3 install setuptools>=41.0.0 + +# Install Cmake +RUN cd /tmp && \ + wget https://github.com/Kitware/CMake/releases/download/v3.14.4/cmake-3.14.4-Linux-x86_64.sh && \ + chmod +x cmake-3.14.4-Linux-x86_64.sh && \ + ./cmake-3.14.4-Linux-x86_64.sh --prefix=/usr/local --exclude-subdir --skip-license && \ + rm ./cmake-3.14.4-Linux-x86_64.sh + +# Skip installing PyPI packages and NGC client on cross-build container + +COPY docker/jetpack_files /pdk_files +COPY scripts/stubify.sh /pdk_files + +# Update CUDA signing keys +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub + +# Install CUDA cross compile toolchain +RUN dpkg -i /pdk_files/cuda-repo-cross-aarch64*.deb /pdk_files/cuda-repo-ubuntu*_amd64.deb \ + && sudo cp /var/cuda-repo-cross-aarch64*/cuda-*keyring.gpg /usr/share/keyrings/ \ + && sudo cp /var/cuda-repo-ubuntu2204*/cuda-*keyring.gpg /usr/share/keyrings/ \ + && apt-get update \ + && apt-get install -y cuda-cross-aarch64 \ + && rm -rf /var/lib/apt/lists/* + +# Unpack cudnn +RUN dpkg -x /pdk_files/cudnn-local*.deb /pdk_files/cudnn_extract \ + && dpkg -x /pdk_files/cudnn_extract/var/cudnn-local*/libcudnn8_*.deb /pdk_files/cudnn \ + && dpkg -x /pdk_files/cudnn_extract/var/cudnn-local*/libcudnn8-dev*.deb /pdk_files/cudnn \ + && cd /pdk_files/cudnn/usr/lib/aarch64-linux-gnu \ + && cd /pdk_files/cudnn \ + && ln -s usr/include/aarch64-linux-gnu include \ + && ln -s usr/lib/aarch64-linux-gnu lib \ + && ln -s /pdk_files/cudnn/usr/include/aarch64-linux-gnu/cudnn_adv_infer_v[7-9].h /usr/include/cudnn_adv_infer.h \ + && ln -s /pdk_files/cudnn/usr/include/aarch64-linux-gnu/cudnn_adv_train_v[7-9].h /usr/include/cudnn_adv_train.h \ + && ln -s /pdk_files/cudnn/usr/include/aarch64-linux-gnu/cudnn_backend_v[7-9].h /usr/include/cudnn_backend.h \ + && ln -s /pdk_files/cudnn/usr/include/aarch64-linux-gnu/cudnn_cnn_infer_v[7-9].h /usr/include/cudnn_cnn_infer.h \ + && ln -s /pdk_files/cudnn/usr/include/aarch64-linux-gnu/cudnn_cnn_train_v[7-9].h /usr/include/cudnn_cnn_train.h \ + && ln -s /pdk_files/cudnn/usr/include/aarch64-linux-gnu/cudnn_ops_infer_v[7-9].h /usr/include/cudnn_ops_infer.h \ + && ln -s /pdk_files/cudnn/usr/include/aarch64-linux-gnu/cudnn_ops_train_v[7-9].h /usr/include/cudnn_ops_train.h \ + && ln -s /pdk_files/cudnn/usr/include/aarch64-linux-gnu/cudnn_v[7-9].h /usr/include/cudnn.h \ + && ln -s /pdk_files/cudnn/usr/include/aarch64-linux-gnu/cudnn_version_v[7-9].h /usr/include/cudnn_version.h + +# Unpack libnvinfer +RUN dpkg -x /pdk_files/libnvinfer10_*-1+cuda12.[0-9]_arm64.deb /pdk_files/tensorrt \ + && dpkg -x /pdk_files/libnvinfer-dev_*-1+cuda12.[0-9]_arm64.deb /pdk_files/tensorrt \ + && dpkg -x /pdk_files/libnvinfer-plugin10_*-1+cuda12.[0-9]_arm64.deb /pdk_files/tensorrt \ + && dpkg -x /pdk_files/libnvinfer-plugin-dev_*-1+cuda12.[0-9]_arm64.deb /pdk_files/tensorrt \ + && dpkg -x /pdk_files/libnvonnxparsers10_*-1+cuda12.[0-9]_arm64.deb /pdk_files/tensorrt \ + && dpkg -x /pdk_files/libnvonnxparsers-dev_*-1+cuda12.[0-9]_arm64.deb /pdk_files/tensorrt + +# Clean up debs +RUN rm -rf /pdk_files/*.deb + +# set up librt.so symlink +RUN ln -sf /usr/aarch64-linux-gnu/lib/librt.so.1 /usr/aarch64-linux-gnu/lib/librt.so +RUN ln -sf /usr/lib/aarch64-linux-gnu/librt.so.1 /usr/lib/aarch64-linux-gnu/librt.so + +# create stub libraries +RUN cd /pdk_files/tensorrt \ + && ln -s usr/include/aarch64-linux-gnu include \ + && ln -s usr/lib/aarch64-linux-gnu lib \ + && cd lib \ + && mkdir stubs \ + && for x in nvinfer nvparsers nvinfer_plugin nvonnxparser; \ + do \ + CC=aarch64-linux-gnu-gcc /pdk_files/stubify.sh lib${x}.so stubs/lib${x}.so \ + ; done + +# Set environment and working directory +ENV TRT_LIBPATH /pdk_files/tensorrt/lib +ENV TRT_OSSPATH /workspace/TensorRT +ENV IS_L4T_CROSS True +WORKDIR /workspace + +USER trtuser +RUN ["/bin/bash"] diff --git a/include/NvInfer.h b/include/NvInfer.h index 7fff86b1..c921ede0 100644 --- a/include/NvInfer.h +++ b/include/NvInfer.h @@ -1282,7 +1282,7 @@ class IConvolutionLayer : public ILayer //! //! If executing this layer on DLA, only support 2D padding, both height and width must be in the range [1,32]. //! - //! \see getDilation() + //! \see getDilationNd() //! void setDilationNd(Dims const& dilation) noexcept { @@ -1292,7 +1292,7 @@ class IConvolutionLayer : public ILayer //! //! \brief Get the multi-dimension dilation of the convolution. //! - //! \see setDilation() + //! \see setDilationNd() //! Dims getDilationNd() const noexcept { @@ -3716,10 +3716,9 @@ class IRaggedSoftMaxLayer : public ILayer //! Two types are compatible if they are identical, or are both in {kFLOAT, kHALF}. //! Implicit conversion between incompatible types, i.e. without using setOutputType, //! is recognized as incorrect as of TensorRT 8.4, but is retained for API compatibility -//! within TensorRT 8.x releases. In a future major release the behavior will change -//! to record an error if the network output tensor type is incompatible with the layer -//! output type. E.g., implicit conversion from kFLOAT to kINT32 will not be allowed, -//! and instead such a conversion will require calling setOutputType(DataType::kINT32). +//! within TensorRT 8.x releases. TensorRT 10.0 onwards it is an error if the network output tensor type is incompatible +//! with the layer output type. E.g., implicit conversion from kFLOAT to kINT32 is not allowed, Use +//! setOutputType(DataType::kINT32) to explict convert kFLOAT to kINT32. //! //! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. //! @@ -4343,6 +4342,14 @@ class ILoop; //! //! \brief This is a base class for Loop boundary layers. //! +//! The loop boundary layers are used to define loops within a network, enabling the implementation +//! of recurrences. The boundary layers for a loop are created by class ILoop. +//! +//! There are four kinds of boundary layers. +//! * ITripLimitLayer: controls the number of loop iterations. +//! * IIterationLayer: iterates over an input tensor. +//! * IRecurrenceLayer: returns an initial value or value from the previous loop iteration. +//! * ILoopOutputLayer: generates an output tensor from the loop iterations. class ILoopBoundaryLayer : public ILayer { public: @@ -4526,6 +4533,8 @@ class IIfConditional : public INoCopy //! //! \brief A recurrence layer in a network definition. //! +//! The recurrence layer allows a loop iteration to compute a result from a value computed in the previous iteration. +//! class IRecurrenceLayer : public ILoopBoundaryLayer { public: @@ -4641,6 +4650,12 @@ class ILoopOutputLayer : public ILoopBoundaryLayer //! //! \brief A layer that represents a trip-count limiter. //! +//! The trip limit layer sets the execution condition for loops, using kCOUNT to define the number of iterations or +//! kWHILE for a conditional loop. A loop can have one of each kind of limit, in which case the loop exits when +//! the trip count is reached or the condition becomes false. +//! +//! See INetworkDefinition::addTripLimit(). +//! class ITripLimitLayer : public ILoopBoundaryLayer { public: @@ -4662,6 +4677,11 @@ class ITripLimitLayer : public ILoopBoundaryLayer //! //! \brief A layer to do iterations. //! +//! The iterator layer iterates over a tensor along the given axis and in the given direction. +//! It enables each loop iteration to inspect a different slice of the tensor. +//! +//! \see ILoop::addIterator() +//! class IIteratorLayer : public ILoopBoundaryLayer { public: @@ -4715,6 +4735,10 @@ class IIteratorLayer : public ILoopBoundaryLayer //! //! \brief Helper for creating a recurrent subgraph. //! +//! An ILoop defines a loop within a network. It supports the implementation of recurrences, +//! which are crucial for iterative computations, such as RNNs for natural language processing and +//! time-series analysis. +//! class ILoop : public INoCopy { public: @@ -4809,7 +4833,12 @@ class ILoop : public INoCopy //! //! \class ISelectLayer //! -//! \brief A select layer in a network definition. +//! \brief Select elements from two data tensors based on a condition tensor. +//! +//! The select layer makes elementwise selections from two data tensors based on a condition tensor, +//! behaving similarly to the numpy.where function with three parameters. +//! The three input tensors must share the same rank. Multidirectional broadcasting is supported. +//! The output tensor has the dimensions of the inputs AFTER applying the broadcast rule. //! //! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. //! @@ -8361,13 +8390,16 @@ enum class MemoryPoolType : int32_t kTACTIC_DRAM = 4, //! - //! kTACTIC_SHARED_MEMORY defines the maximum shared memory size utilized for executing - //! the backend CUDA kernel implementation. Adjust this value to restrict tactics that exceed - //! the specified threshold en masse. The default value is device max capability. This value must + //! kTACTIC_SHARED_MEMORY defines the maximum sum of shared memory reserved by the driver and + //! used for executing CUDA kernels. Adjust this value to restrict tactics that exceed the + //! specified threshold en masse. The default value is device max capability. This value must //! be less than 1GiB. //! + //! The driver reserved shared memory can be queried from cuDeviceGetAttribute(&reservedShmem, + //! CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK). + //! //! Updating this flag will override the shared memory limit set by \ref HardwareCompatibilityLevel, - //! which defaults to 48KiB. + //! which defaults to 48KiB - reservedShmem. //! kTACTIC_SHARED_MEMORY = 5, }; @@ -8430,10 +8462,15 @@ enum class HardwareCompatibilityLevel : int32_t //! built. kNONE = 0, - //! Require that the engine is compatible with Ampere and newer GPUs. This will limit the max shared memory usage to - //! 48KiB, may reduce the number of available tactics for each layer, and may prevent some fusions from occurring. - //! Thus this can decrease the performance, especially for tf32 models. + //! Require that the engine is compatible with Ampere and newer GPUs. This will limit the combined usage of driver + //! reserved and backend kernel max shared memory to 48KiB, may reduce the number of available tactics for each + //! layer, and may prevent some fusions from occurring. Thus this can decrease the performance, especially for tf32 + //! models. //! This option will disable cuDNN, cuBLAS, and cuBLAS LT as tactic sources. + //! + //! The driver reserved shared memory can be queried from cuDeviceGetAttribute(&reservedShmem, + //! CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK). + //! kAMPERE_PLUS = 1, }; diff --git a/include/NvInferConsistency.h b/include/NvInferConsistency.h index 5096c3f4..32bca28b 100644 --- a/include/NvInferConsistency.h +++ b/include/NvInferConsistency.h @@ -19,7 +19,9 @@ #define NV_INFER_CONSISTENCY_H #include "NvInferConsistencyImpl.h" +#define NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE 1 #include "NvInferRuntimeBase.h" +#undef NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE #include "NvInferRuntimePlugin.h" //! diff --git a/include/NvInferLegacyDims.h b/include/NvInferLegacyDims.h index 204d17a8..2725d184 100644 --- a/include/NvInferLegacyDims.h +++ b/include/NvInferLegacyDims.h @@ -18,7 +18,9 @@ #ifndef NV_INFER_LEGACY_DIMS_H #define NV_INFER_LEGACY_DIMS_H -#include "NvInferRuntimeCommon.h" +#define NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE 1 +#include "NvInferRuntimeBase.h" +#undef NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE //! //! \file NvInferLegacyDims.h diff --git a/include/NvInferRuntimeBase.h b/include/NvInferRuntimeBase.h index 60006e6c..3624706c 100644 --- a/include/NvInferRuntimeBase.h +++ b/include/NvInferRuntimeBase.h @@ -64,9 +64,15 @@ //! //! This file contains common definitions, data structures and interfaces shared between the standard and safe runtime. //! -//! \warning Do not directly include this file. Instead include either NvInferRuntime.h (for the standard runtime) or -//! NvInferSafeRuntime.h (for the safety runtime). -//! +//! \warning Do not directly include this file. Instead include one of: +//! * NvInferRuntime.h (for the standard runtime) +//! * NvInferSafeRuntime.h (for the safety runtime) +//! * NvInferConsistency.h (for consistency checker) +//! * NvInferPluginUtils.h (for plugin utilities) +//! +#if !defined(NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE) && !defined(TRT_VCAST_SAFE) +static_assert(false, "Do not directly include this file. Include NvInferRuntime.h or NvInferSafeRuntime.h or NvInferConsistency.h or NvInferPluginUtils.h"); +#endif //! Forward declare some CUDA types to avoid an include dependency. @@ -864,6 +870,8 @@ class IErrorRecorder : public IVersionedInterface //! //! \brief The length limit for an error description in bytes, excluding the '\0' string terminator. + //! Only applicable to safe runtime. + //! General error recorder implementation can use any size appropriate for the use case. //! static constexpr size_t kMAX_DESC_LENGTH{127U}; @@ -982,10 +990,10 @@ class IErrorRecorder : public IVersionedInterface //! //! \brief Report an error to the error recorder with the corresponding enum and description. //! - //! \param val The error code enum that is being reported. - //! \param desc The string description of the error, which will be a NULL-terminated string of kMAX_DESC_LENGTH - //! bytes or less (excluding the NULL terminator). Descriptions that exceed this limit will be silently - //! truncated. + //! \param val The error code enum that is being reported. + //! \param desc The string description of the error, which will be a NULL-terminated string. + //! For safety use cases its length is limited to kMAX_DESC_LENGTH bytes + //! (excluding the NULL terminator) and descriptions that exceed this limit will be silently truncated. //! //! Report an error to the user that has a given value and human readable description. The function returns false //! if processing can continue, which implies that the reported error is not fatal. This does not guarantee that diff --git a/include/NvInferRuntimeCommon.h b/include/NvInferRuntimeCommon.h index 65a3c220..13e42f4f 100644 --- a/include/NvInferRuntimeCommon.h +++ b/include/NvInferRuntimeCommon.h @@ -28,7 +28,9 @@ //! //! \warning Do not directly include this file. Instead include NvInferRuntime.h //! +#define NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE 1 #include "NvInferRuntimeBase.h" +#undef NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE #include "NvInferRuntimePlugin.h" namespace nvinfer1 diff --git a/include/NvInferRuntimePlugin.h b/include/NvInferRuntimePlugin.h index ecae2ce9..5f97f4a5 100644 --- a/include/NvInferRuntimePlugin.h +++ b/include/NvInferRuntimePlugin.h @@ -18,7 +18,9 @@ #ifndef NV_INFER_RUNTIME_PLUGIN_H #define NV_INFER_RUNTIME_PLUGIN_H +#define NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE 1 #include "NvInferRuntimeBase.h" +#undef NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE //! //! \file NvInferRuntimePlugin.h diff --git a/include/NvInferSafeRuntime.h b/include/NvInferSafeRuntime.h index 1c322c4e..6dc503e0 100644 --- a/include/NvInferSafeRuntime.h +++ b/include/NvInferSafeRuntime.h @@ -18,7 +18,9 @@ #ifndef NV_INFER_SAFE_RUNTIME_H #define NV_INFER_SAFE_RUNTIME_H +#define NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE 1 #include "NvInferRuntimeBase.h" +#undef NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE #include "NvInferRuntimePlugin.h" #include #include diff --git a/include/NvInferVersion.h b/include/NvInferVersion.h index 8c99bea7..13861a12 100644 --- a/include/NvInferVersion.h +++ b/include/NvInferVersion.h @@ -25,7 +25,7 @@ #define NV_TENSORRT_MAJOR 10 //!< TensorRT major version. #define NV_TENSORRT_MINOR 0 //!< TensorRT minor version. -#define NV_TENSORRT_PATCH 0 //!< TensorRT patch version. +#define NV_TENSORRT_PATCH 1 //!< TensorRT patch version. #define NV_TENSORRT_BUILD 6 //!< TensorRT build number. #define NV_TENSORRT_LWS_MAJOR 0 //!< TensorRT LWS major version. @@ -36,6 +36,6 @@ #define NV_TENSORRT_RELEASE_TYPE_RELEASE_CANDIDATE 1 //!< A release candidate #define NV_TENSORRT_RELEASE_TYPE_GENERAL_AVAILABILITY 2 //!< A final release -#define NV_TENSORRT_RELEASE_TYPE NV_TENSORRT_RELEASE_TYPE_EARLY_ACCESS //!< TensorRT release type +#define NV_TENSORRT_RELEASE_TYPE NV_TENSORRT_RELEASE_TYPE_GENERAL_AVAILABILITY //!< TensorRT release type #endif // NV_INFER_VERSION_H diff --git a/parsers/CMakeLists.txt b/parsers/CMakeLists.txt index 750942e6..6b4858ba 100644 --- a/parsers/CMakeLists.txt +++ b/parsers/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/parsers/common/half.h b/parsers/common/half.h index 7497459a..a66c197c 100644 --- a/parsers/common/half.h +++ b/parsers/common/half.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/parsers/common/ieee_half.h b/parsers/common/ieee_half.h index 071aee09..ac78fd6b 100644 --- a/parsers/common/ieee_half.h +++ b/parsers/common/ieee_half.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/parsers/common/parserUtils.h b/parsers/common/parserUtils.h index 115a2efa..eeb14724 100644 --- a/parsers/common/parserUtils.h +++ b/parsers/common/parserUtils.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/parsers/onnx b/parsers/onnx index 973d68d0..eb43908b 160000 --- a/parsers/onnx +++ b/parsers/onnx @@ -1 +1 @@ -Subproject commit 973d68d06f671998ddcc0c504b9a2fdfcfc85a62 +Subproject commit eb43908b02a296ea0594432f06e9d3fac288d672 diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt index 2e708d3a..2007b7ed 100644 --- a/plugin/CMakeLists.txt +++ b/plugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,10 +16,10 @@ # add_custom_target(plugin) -set(TARGET_NAME nvinfer_plugin) +set(TARGET_NAME ${nvinfer_plugin_lib_name}) set(SHARED_TARGET ${TARGET_NAME}) set(STATIC_TARGET ${TARGET_NAME}_static) -set(VFC_TARGET_NAME nvinfer_vc_plugin) +set(VFC_TARGET_NAME ${nvinfer_vc_plugin_lib_name}) set(VFC_SHARED_TARGET ${VFC_TARGET_NAME}) set(TARGET_DIR ${CMAKE_CURRENT_SOURCE_DIR}) @@ -143,10 +143,6 @@ else() set_target_properties(${SHARED_TARGET} PROPERTIES LINK_FLAGS "-Wl,--exclude-libs,ALL -Wl,-Bsymbolic -Wl,--version-script=${PLUGIN_EXPORT_MAP} -Wl,--no-undefined") endif() -if (ADDITIONAL_PLATFORM_LIB_FLAGS) - set_target_properties(${SHARED_TARGET} PROPERTIES LINK_FLAGS ${ADDITIONAL_PLATFORM_LIB_FLAGS}) -endif() - set_target_properties(${SHARED_TARGET} PROPERTIES DEBUG_POSTFIX ${TRT_DEBUG_POSTFIX}) set_target_properties(${SHARED_TARGET} PROPERTIES VERSION ${TRT_VERSION} SOVERSION ${TRT_SOVERSION} ) @@ -155,7 +151,7 @@ set_property(TARGET ${SHARED_TARGET} PROPERTY CUDA_STANDARD 14) target_link_libraries(${SHARED_TARGET} ${CUDART_LIB} - ${nvinfer_LIB_PATH} + ${${nvinfer_lib_name}_LIB_PATH} ${CMAKE_DL_LIBS} ) @@ -189,10 +185,6 @@ set_target_properties(${STATIC_TARGET} PROPERTIES set_target_properties(${STATIC_TARGET} PROPERTIES LINK_FLAGS "-Wl,--exclude-libs,ALL") -if (ADDITIONAL_PLATFORM_LIB_FLAGS) - set_target_properties(${STATIC_TARGET} PROPERTIES LINK_FLAGS ${ADDITIONAL_PLATFORM_LIB_FLAGS}) -endif() - set_target_properties(${STATIC_TARGET} PROPERTIES DEBUG_POSTFIX ${TRT_DEBUG_POSTFIX}) set_target_properties(${STATIC_TARGET} PROPERTIES VERSION ${TRT_VERSION} SOVERSION ${TRT_SOVERSION} ) @@ -230,10 +222,6 @@ else() set_target_properties(${VFC_SHARED_TARGET} PROPERTIES LINK_FLAGS "-Wl,--exclude-libs,ALL -Wl,-Bsymbolic -Wl,--version-script=${VFC_PLUGIN_EXPORT_MAP} -Wl,--no-undefined") endif() -if (ADDITIONAL_PLATFORM_LIB_FLAGS) - set_target_properties(${VFC_SHARED_TARGET} PROPERTIES LINK_FLAGS ${ADDITIONAL_PLATFORM_LIB_FLAGS}) -endif() - set_target_properties(${VFC_SHARED_TARGET} PROPERTIES DEBUG_POSTFIX ${TRT_DEBUG_POSTFIX}) set_target_properties(${VFC_SHARED_TARGET} PROPERTIES VERSION ${TRT_VERSION} SOVERSION ${TRT_SOVERSION} ) @@ -242,7 +230,7 @@ set_property(TARGET ${VFC_SHARED_TARGET} PROPERTY CUDA_STANDARD 14) target_link_libraries(${VFC_SHARED_TARGET} ${CUDART_LIB} - ${nvinfer_LIB_PATH} + ${${nvinfer_lib_name}_LIB_PATH} ${CMAKE_DL_LIBS} ) diff --git a/plugin/batchTilePlugin/CMakeLists.txt b/plugin/batchTilePlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/batchTilePlugin/CMakeLists.txt +++ b/plugin/batchTilePlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/batchTilePlugin/batchTilePlugin.cpp b/plugin/batchTilePlugin/batchTilePlugin.cpp index 7b99d578..1e98ac6e 100644 --- a/plugin/batchTilePlugin/batchTilePlugin.cpp +++ b/plugin/batchTilePlugin/batchTilePlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/batchTilePlugin/batchTilePlugin.h b/plugin/batchTilePlugin/batchTilePlugin.h index 0ff85bb0..fe1ce902 100644 --- a/plugin/batchTilePlugin/batchTilePlugin.h +++ b/plugin/batchTilePlugin/batchTilePlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/batchedNMSPlugin/CMakeLists.txt b/plugin/batchedNMSPlugin/CMakeLists.txt index 1f1d4169..f1f6081b 100644 --- a/plugin/batchedNMSPlugin/CMakeLists.txt +++ b/plugin/batchedNMSPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/batchedNMSPlugin/batchedNMSInference.cu b/plugin/batchedNMSPlugin/batchedNMSInference.cu index 9d01f5b8..2a0ceff3 100644 --- a/plugin/batchedNMSPlugin/batchedNMSInference.cu +++ b/plugin/batchedNMSPlugin/batchedNMSInference.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/batchedNMSPlugin/batchedNMSPlugin.cpp b/plugin/batchedNMSPlugin/batchedNMSPlugin.cpp index 40ff8671..428db1ad 100644 --- a/plugin/batchedNMSPlugin/batchedNMSPlugin.cpp +++ b/plugin/batchedNMSPlugin/batchedNMSPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/batchedNMSPlugin/batchedNMSPlugin.h b/plugin/batchedNMSPlugin/batchedNMSPlugin.h index 418333e8..4c6c749f 100644 --- a/plugin/batchedNMSPlugin/batchedNMSPlugin.h +++ b/plugin/batchedNMSPlugin/batchedNMSPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/batchedNMSPlugin/gatherNMSOutputs.h b/plugin/batchedNMSPlugin/gatherNMSOutputs.h index f245eb93..0e9b78e4 100644 --- a/plugin/batchedNMSPlugin/gatherNMSOutputs.h +++ b/plugin/batchedNMSPlugin/gatherNMSOutputs.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/CMakeLists.txt b/plugin/bertQKVToContextPlugin/CMakeLists.txt index 6bdff6d7..da805cd2 100644 --- a/plugin/bertQKVToContextPlugin/CMakeLists.txt +++ b/plugin/bertQKVToContextPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/CMakeLists.txt b/plugin/bertQKVToContextPlugin/fused_multihead_attention/CMakeLists.txt index 1d53970e..91e05d03 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/CMakeLists.txt +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/include/fused_multihead_attention.h b/plugin/bertQKVToContextPlugin/fused_multihead_attention/include/fused_multihead_attention.h index d59e8a73..e1b51b9d 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/include/fused_multihead_attention.h +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/include/fused_multihead_attention.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -32,6 +32,236 @@ namespace nvinfer1 { + +namespace pluginInternal +{ +template +class TFusedMultiHeadAttentionXMMAKernel +{ +public: + using KernelMeta = TKernelMeta; + using KernelParam = TKernelParam; + inline uint64_t hashID(uint32_t s, uint32_t d) const + { + return (uint64_t) s << 32 | d; + } + virtual uint64_t hashID(const KernelMeta& kernelMeta) const + { + return hashID(kernelMeta.mS, kernelMeta.mD); + } + + TFusedMultiHeadAttentionXMMAKernel( + const TKernelMeta* pMetaStart, uint32_t nMetaCount, plugin::bert::Data_type type, uint32_t sm) + : mDataType(type) + , mKernelMeta(pMetaStart) + , mKernelMetaCount(nMetaCount) + , mSM(sm) + { + PLUGIN_ASSERT(mKernelMetaCount && "No kernels were loaded correctly."); + } + + void loadXMMAKernels(uint32_t smVersion) + { + for (uint32_t i = 0; i < mKernelMetaCount; ++i) + { + const auto& kernelMeta = mKernelMeta[i]; + const auto kernelKey = hashID(kernelMeta); + if (kernelMeta.mSM == smVersion && kernelMeta.mDataType == mDataType + && mFunctions.find(kernelKey) == mFunctions.end()) + { + const uint32_t DEFAULT_SMEM_SIZE{48 * 1024}; + if (kernelMeta.mSharedMemBytes >= DEFAULT_SMEM_SIZE) + { + int32_t deviceID{0}; + cudaGetDevice(&deviceID); + int32_t sharedMemPerMultiprocessor{0}; + if (cudaDeviceGetAttribute( + &sharedMemPerMultiprocessor, cudaDevAttrMaxSharedMemoryPerBlockOptin, deviceID) + != cudaSuccess + || sharedMemPerMultiprocessor < static_cast(kernelMeta.mSharedMemBytes)) + { + // skip load function because not enough shared memory to launch the kernel + continue; + } + } + + CUmodule hmod{0}; + auto findModuleIter = mModules.find(kernelMeta.mCubin); + if (findModuleIter != mModules.end()) + { + hmod = findModuleIter->second; + } + else + { + cuErrCheck(mDriver.cuModuleLoadData(&hmod, kernelMeta.mCubin), mDriver); + mModules.insert(std::make_pair(kernelMeta.mCubin, hmod)); + } + + FusedMultiHeadAttentionKernelInfo funcInfo; + funcInfo.mMetaInfoIndex = i; + cuErrCheck(mDriver.cuModuleGetFunction(&funcInfo.mDeviceFunction, hmod, kernelMeta.mFuncName), mDriver); + if (kernelMeta.mSharedMemBytes >= DEFAULT_SMEM_SIZE) + { + if (mDriver.cuFuncSetAttribute(funcInfo.mDeviceFunction, + CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, kernelMeta.mSharedMemBytes) + != CUDA_SUCCESS) + { + // some chip may not have enough shared memory to launch the kernel + continue; + } + } + mFunctions.insert({kernelKey, funcInfo}); + uint64_t const s = kernelMeta.mS; + uint64_t const headSize = kernelMeta.mD; + uint64_t key = (headSize << 32 | s); + if (mValidSequences.find(key) == mValidSequences.end()) + { + mValidSequences.insert(key); + } + } + } + } + + void loadXMMAKernels() + { + if (!mFunctions.empty()) + { + return; + } + + loadXMMAKernels(mSM); + + // sm_86 chips prefer sm_86 sass, but can also use sm_80 sass if sm_86 not exist. + // sm_87 cannot run sm_80 sass + if (mSM == kSM_86) + { + loadXMMAKernels(kSM_80); + } + + // sm_89 will reuse sm_80 and sm_86 kernels + if (mSM == kSM_89) + { + loadXMMAKernels(kSM_86); + loadXMMAKernels(kSM_80); + } + } + + bool isValid(int32_t headSize, int32_t s) const + { + uint64_t key = (static_cast(headSize) << 32 | static_cast(s)); + return (mValidSequences.find(key) != mValidSequences.end()); + } + + virtual void run(TKernelParam& params, cudaStream_t ss) const + { + const auto findIter = mFunctions.find(hashID(params.s, params.d)); + std::stringstream errMsg; + errMsg << "Could not find kernel for:\n" + << "\t s: " << params.s << "\n" + << "\t d: " << params.d << "\n" + << "Was the plugin compiled on a compatible CUDA and SM version?\n" + << "\t Compiled on CUDA " << CUDA_VERSION << "\n" + << "\t Current SM version: " << mSM << "\n" + << "\t SM versions enabled during compilation: " +#if defined(ENABLE_SM72) + << "72 " +#endif +#if defined(ENABLE_SM75) + << "75 " +#endif +#if defined(ENABLE_SM80) + << "80 " +#endif +#if defined(ENABLE_SM86) + << "86 " +#endif +#if defined(ENABLE_SM87) + << "87 " +#endif +#if defined(ENABLE_SM89) + << "89 " +#endif +#if defined(ENABLE_SM90) + << "90 " +#endif + << "\n"; + PLUGIN_VALIDATE(findIter != mFunctions.end(), errMsg.str().c_str()); + + const auto& kernelMeta = mKernelMeta[findIter->second.mMetaInfoIndex]; + const CUfunction func = findIter->second.mDeviceFunction; + + void* kernelParams[] = {¶ms, nullptr}; + cuErrCheck(mDriver.cuLaunchKernel(func, params.h, params.b, 1, kernelMeta.mThreadsPerCTA, 1, 1, + kernelMeta.mSharedMemBytes, ss, kernelParams, nullptr), + mDriver); + } + + virtual ~TFusedMultiHeadAttentionXMMAKernel() = default; + +protected: + nvinfer1::CUDADriverWrapper mDriver; + + plugin::bert::Data_type mDataType; + const TKernelMeta* mKernelMeta; + uint32_t mKernelMetaCount; + uint32_t mSM; + std::unordered_map mModules; + struct FusedMultiHeadAttentionKernelInfo + { + uint32_t mMetaInfoIndex; + CUfunction mDeviceFunction; + }; + std::unordered_map mFunctions; + // Set of valid sequence and head size combination. We use (headSize << 32 | sequence) as key here. + std::unordered_set mValidSequences; +}; +template +class TFusedMHAKernelFactory +{ +public: + const TFusedMHAKernelList* getXMMAKernels(const typename TFusedMHAKernelList::KernelMeta* pKernelList, + uint32_t nbKernels, plugin::bert::Data_type type, uint32_t sm) + { + static std::mutex s_mutex; + std::lock_guard lg(s_mutex); + + const auto id = hashID(type, sm); + const auto findIter = mKernels.find(id); + if (findIter == mKernels.end()) + { + TFusedMHAKernelList* newKernel = new TFusedMHAKernelList{pKernelList, nbKernels, type, sm}; + newKernel->loadXMMAKernels(); + mKernels.insert(std::make_pair(id, std::unique_ptr(newKernel))); + return newKernel; + } + return findIter->second.get(); + } + + static TFusedMHAKernelFactory& Get() + { + static TFusedMHAKernelFactory s_factory; + return s_factory; + } + +private: + TFusedMHAKernelFactory() = default; + + inline uint64_t hashID(plugin::bert::Data_type type, uint32_t sm) const + { + // use deviceID in hasID for multi GPU support before driver support context-less loading of cubin + int32_t deviceID{0}; + CSC(cudaGetDevice(&deviceID), STATUS_FAILURE); + + PLUGIN_ASSERT((deviceID & 0xFFFF) == deviceID); + PLUGIN_ASSERT((type & 0xFFFF) == type); + PLUGIN_ASSERT((sm & 0xFFFFFFFF) == sm); + return (uint64_t) type << 48 | (uint64_t) deviceID << 32 | sm; + } + + std::unordered_map> mKernels; +}; +} // namespace pluginInternal + namespace plugin { namespace bert @@ -324,235 +554,10 @@ static const struct FusedMultiHeadAttentionKernelMetaInfoV1 #endif // defined(ENABLE_SM90) }; -template -class TFusedMultiHeadAttentionXMMAKernel -{ -public: - using KernelMeta = TKernelMeta; - using KernelParam = TKernelParam; - inline uint64_t hashID(uint32_t s, uint32_t d) const - { - return (uint64_t) s << 32 | d; - } - virtual uint64_t hashID(const KernelMeta& kernelMeta) const - { - return hashID(kernelMeta.mS, kernelMeta.mD); - } - - TFusedMultiHeadAttentionXMMAKernel(const TKernelMeta* pMetaStart, uint32_t nMetaCount, Data_type type, uint32_t sm) - : mDataType(type) - , mKernelMeta(pMetaStart) - , mKernelMetaCount(nMetaCount) - , mSM(sm) - { - PLUGIN_ASSERT(mKernelMetaCount && "No kernels were loaded correctly."); - } - - void loadXMMAKernels(uint32_t smVersion) - { - for (uint32_t i = 0; i < mKernelMetaCount; ++i) - { - const auto& kernelMeta = mKernelMeta[i]; - const auto kernelKey = hashID(kernelMeta); - if (kernelMeta.mSM == smVersion && kernelMeta.mDataType == mDataType - && mFunctions.find(kernelKey) == mFunctions.end()) - { - const uint32_t DEFAULT_SMEM_SIZE{48 * 1024}; - if (kernelMeta.mSharedMemBytes >= DEFAULT_SMEM_SIZE) - { - int32_t deviceID{0}; - cudaGetDevice(&deviceID); - int32_t sharedMemPerMultiprocessor{0}; - if (cudaDeviceGetAttribute( - &sharedMemPerMultiprocessor, cudaDevAttrMaxSharedMemoryPerBlockOptin, deviceID) - != cudaSuccess - || sharedMemPerMultiprocessor < static_cast(kernelMeta.mSharedMemBytes)) - { - // skip load function because not enough shared memory to launch the kernel - continue; - } - } - - CUmodule hmod{0}; - auto findModuleIter = mModules.find(kernelMeta.mCubin); - if (findModuleIter != mModules.end()) - { - hmod = findModuleIter->second; - } - else - { - cuErrCheck(mDriver.cuModuleLoadData(&hmod, kernelMeta.mCubin), mDriver); - mModules.insert(std::make_pair(kernelMeta.mCubin, hmod)); - } - - FusedMultiHeadAttentionKernelInfo funcInfo; - funcInfo.mMetaInfoIndex = i; - cuErrCheck(mDriver.cuModuleGetFunction(&funcInfo.mDeviceFunction, hmod, kernelMeta.mFuncName), mDriver); - if (kernelMeta.mSharedMemBytes >= DEFAULT_SMEM_SIZE) - { - if (mDriver.cuFuncSetAttribute(funcInfo.mDeviceFunction, - CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, kernelMeta.mSharedMemBytes) - != CUDA_SUCCESS) - { - // some chip may not have enough shared memory to launch the kernel - continue; - } - } - mFunctions.insert({kernelKey, funcInfo}); - uint64_t const s = kernelMeta.mS; - uint64_t const headSize = kernelMeta.mD; - uint64_t key = (headSize << 32 | s); - if (mValidSequences.find(key) == mValidSequences.end()) - { - mValidSequences.insert(key); - } - } - } - } - - void loadXMMAKernels() - { - if (!mFunctions.empty()) - { - return; - } - - loadXMMAKernels(mSM); - - // sm_86 chips prefer sm_86 sass, but can also use sm_80 sass if sm_86 not exist. - // sm_87 cannot run sm_80 sass - if (mSM == kSM_86) - { - loadXMMAKernels(kSM_80); - } - - // sm_89 will reuse sm_80 and sm_86 kernels - if (mSM == kSM_89) - { - loadXMMAKernels(kSM_86); - loadXMMAKernels(kSM_80); - } - } - - bool isValid(int32_t headSize, int32_t s) const - { - uint64_t key = (static_cast(headSize) << 32 | static_cast(s)); - return (mValidSequences.find(key) != mValidSequences.end()); - } - - virtual void run(TKernelParam& params, cudaStream_t ss) const - { - const auto findIter = mFunctions.find(hashID(params.s, params.d)); - std::stringstream errMsg; - errMsg << "Could not find kernel for:\n" - << "\t s: " << params.s << "\n" - << "\t d: " << params.d << "\n" - << "Was the plugin compiled on a compatible CUDA and SM version?\n" - << "\t Compiled on CUDA " << CUDA_VERSION << "\n" - << "\t Current SM version: " << mSM << "\n" - << "\t SM versions enabled during compilation: " -#if defined(ENABLE_SM72) - << "72 " -#endif -#if defined(ENABLE_SM75) - << "75 " -#endif -#if defined(ENABLE_SM80) - << "80 " -#endif -#if defined(ENABLE_SM86) - << "86 " -#endif -#if defined(ENABLE_SM87) - << "87 " -#endif -#if defined(ENABLE_SM89) - << "89 " -#endif -#if defined(ENABLE_SM90) - << "90 " -#endif - << "\n"; - PLUGIN_VALIDATE(findIter != mFunctions.end(), errMsg.str().c_str()); - - const auto& kernelMeta = mKernelMeta[findIter->second.mMetaInfoIndex]; - const CUfunction func = findIter->second.mDeviceFunction; - - void* kernelParams[] = {¶ms, nullptr}; - cuErrCheck(mDriver.cuLaunchKernel(func, params.h, params.b, 1, kernelMeta.mThreadsPerCTA, 1, 1, - kernelMeta.mSharedMemBytes, ss, kernelParams, nullptr), - mDriver); - } - - virtual ~TFusedMultiHeadAttentionXMMAKernel() = default; - -protected: - nvinfer1::CUDADriverWrapper mDriver; - - Data_type mDataType; - const TKernelMeta* mKernelMeta; - uint32_t mKernelMetaCount; - uint32_t mSM; - std::unordered_map mModules; - struct FusedMultiHeadAttentionKernelInfo - { - uint32_t mMetaInfoIndex; - CUfunction mDeviceFunction; - }; - std::unordered_map mFunctions; - // Set of valid sequence and head size combination. We use (headSize << 32 | sequence) as key here. - std::unordered_set mValidSequences; -}; - -template -class TFusedMHAKernelFactory -{ -public: - const TFusedMHAKernelList* getXMMAKernels( - const typename TFusedMHAKernelList::KernelMeta* pKernelList, uint32_t nbKernels, Data_type type, uint32_t sm) - { - static std::mutex s_mutex; - std::lock_guard lg(s_mutex); - - const auto id = hashID(type, sm); - const auto findIter = mKernels.find(id); - if (findIter == mKernels.end()) - { - TFusedMHAKernelList* newKernel = new TFusedMHAKernelList{pKernelList, nbKernels, type, sm}; - newKernel->loadXMMAKernels(); - mKernels.insert(std::make_pair(id, std::unique_ptr(newKernel))); - return newKernel; - } - return findIter->second.get(); - } - - static TFusedMHAKernelFactory& Get() - { - static TFusedMHAKernelFactory s_factory; - return s_factory; - } - -private: - TFusedMHAKernelFactory() = default; - - inline uint64_t hashID(Data_type type, uint32_t sm) const - { - // use deviceID in hasID for multi GPU support before driver support context-less loading of cubin - int32_t deviceID{0}; - CSC(cudaGetDevice(&deviceID), STATUS_FAILURE); - - PLUGIN_ASSERT((deviceID & 0xFFFF) == deviceID); - PLUGIN_ASSERT((type & 0xFFFF) == type); - PLUGIN_ASSERT((sm & 0xFFFFFFFF) == sm); - return (uint64_t) type << 48 | (uint64_t) deviceID << 32 | sm; - } - - std::unordered_map> mKernels; -}; - using FusedMultiHeadAttentionXMMAKernel - = TFusedMultiHeadAttentionXMMAKernel; -using FusedMHAKernelFactory = TFusedMHAKernelFactory; + = pluginInternal::TFusedMultiHeadAttentionXMMAKernel; +using FusedMHAKernelFactory = pluginInternal::TFusedMHAKernelFactory; inline const FusedMultiHeadAttentionXMMAKernel* getXMMAKernels(Data_type type, uint32_t sm) { diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/include/fused_multihead_attention_common.h b/plugin/bertQKVToContextPlugin/fused_multihead_attention/include/fused_multihead_attention_common.h index 11d4b954..e1fe7d40 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/include/fused_multihead_attention_common.h +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/include/fused_multihead_attention_common.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm75.cpp index af45426d..9ae4c46d 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm80.cpp index 3e5031b1..aef4ae47 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm87.cpp index 0d0a6ed7..6846143d 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm90.cpp index a5134aaf..41bd15fa 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_128_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm75.cpp index e2604633..59cadd97 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm80.cpp index 035270eb..ab54f6b9 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm86.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm86.cpp index 81f7a887..9189749c 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm86.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm86.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm87.cpp index 929c0a4b..92e6811e 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm90.cpp index a9592f3f..a2a10d10 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_384_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_512_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_512_64_kernel.sm90.cpp index a5a19772..690e6f42 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_512_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_512_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm75.cpp index 9dc6ffa6..6d8c23da 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm80.cpp index 588d5dc8..34eba769 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm87.cpp index 4d6308d3..9268ddc3 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm90.cpp index fd292683..43b2bd85 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_64_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm75.cpp index 238e9fbd..f345e66c 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm80.cpp index a2eb24f7..c61eb87a 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm87.cpp index 5b39da95..29d128ef 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm90.cpp index 1af3e96a..18e389ca 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_fp16_96_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm75.cpp index a18e4874..26ca9b77 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm80.cpp index 0c079b17..ffb0d50d 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm87.cpp index b88a696d..26b7460f 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm90.cpp index 457af2b6..eb18694d 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_128_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm75.cpp index 22611907..941996d1 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm80.cpp index bf716793..5fe88e45 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm87.cpp index c4376f86..0d23c4a1 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm90.cpp index 44f159a7..576b0e17 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_384_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_512_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_512_64_kernel.sm90.cpp index fd51119e..6cef65c5 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_512_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_512_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_64_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_64_64_kernel.sm80.cpp index 062ce999..6211cf87 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_64_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_64_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_96_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_96_64_kernel.sm80.cpp index 017f6862..b94a6a7b 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_96_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention/src/fused_multihead_attention_int8_96_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/CMakeLists.txt b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/CMakeLists.txt index 1d53970e..91e05d03 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/CMakeLists.txt +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/include/fused_multihead_attention_v2.h b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/include/fused_multihead_attention_v2.h index bb729359..ecc3684d 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/include/fused_multihead_attention_v2.h +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/include/fused_multihead_attention_v2.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -832,14 +832,14 @@ static const struct FusedMultiHeadAttentionKernelMetaInfoV2 }; class FusedMultiHeadAttentionXMMAKernelV2 - : public TFusedMultiHeadAttentionXMMAKernel { public: FusedMultiHeadAttentionXMMAKernelV2( const FusedMultiHeadAttentionKernelMetaInfoV2* pMetaStart, uint32_t nMetaCount, Data_type type, uint32_t sm) - : TFusedMultiHeadAttentionXMMAKernel(pMetaStart, nMetaCount, type, sm) + : pluginInternal::TFusedMultiHeadAttentionXMMAKernel(pMetaStart, nMetaCount, type, sm) { } @@ -988,7 +988,7 @@ class FusedMultiHeadAttentionXMMAKernelV2 } }; -using FusedMHAKernelFactoryV2 = TFusedMHAKernelFactory; +using FusedMHAKernelFactoryV2 = pluginInternal::TFusedMHAKernelFactory; inline const FusedMultiHeadAttentionXMMAKernelV2* getXMMAKernelsV2(Data_type type, uint32_t sm) { diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_32_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_32_kernel.sm75.cpp index 373f496a..d82cc0cb 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_32_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_32_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_32_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_32_kernel.sm80.cpp index 1e3ff7c6..3f992060 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_32_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_32_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm75.cpp index ece2d0eb..c146aa40 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm80.cpp index dbc34090..6ae22e4a 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm86.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm86.cpp index ff794f09..f8a98908 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm86.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm86.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm87.cpp index d957a175..6f3b27e3 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm90.cpp index 910c2772..d56ece44 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_128_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_32_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_32_kernel.sm75.cpp index f466437c..05ffdb23 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_32_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_32_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_32_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_32_kernel.sm80.cpp index 643f3abe..0d6a6c53 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_32_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_32_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm75.cpp index b193aac5..d549443c 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm80.cpp index eedf762f..e8b7ec1d 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm86.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm86.cpp index 17cdf962..1d7791b3 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm86.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm86.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm87.cpp index 3943f07e..a03a9a39 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm90.cpp index 8aebf6e4..6e04b4e7 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_256_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm75.cpp index 47d6f8b4..b9f264c2 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm80.cpp index 2c0141c6..1b5e752a 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm86.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm86.cpp index 007b0ca5..320a9e88 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm86.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm86.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm87.cpp index e47a0eb5..e264c016 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm90.cpp index 71047e0d..f9ce8e34 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_384_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_32_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_32_kernel.sm75.cpp index e424fd93..8f766536 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_32_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_32_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_32_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_32_kernel.sm80.cpp index f3b2aec9..b22936ed 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_32_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_32_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_64_kernel.sm75.cpp index 6706f1e1..624e6e0b 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_64_kernel.sm80.cpp index 57d31338..c8a9c2f9 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_64_kernel.sm90.cpp index d9bbd955..a03160a2 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_512_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm75.cpp index a93f1f80..4642ab1a 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm80.cpp index fc6e825e..fae19400 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm86.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm86.cpp index dc64aaf1..c5dc1be8 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm86.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm86.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm87.cpp index 17394f7b..b93318b7 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm90.cpp index 30a6a139..192047d0 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_64_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm75.cpp index 75826861..a4dd7851 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm80.cpp index a5a9db91..9f8557f6 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm86.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm86.cpp index 5c0e4792..e45804f5 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm86.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm86.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm87.cpp index 75cca5b0..f9fd241e 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm90.cpp index 05ed3a7d..93e21e59 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_fp16_96_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_128_32_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_128_32_kernel.sm80.cpp index 7377bb87..cf253602 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_128_32_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_128_32_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_128_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_128_64_kernel.sm87.cpp index c486ba74..a446bdc9 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_128_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_128_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_128_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_128_64_kernel.sm90.cpp index ff8b71b7..52dd640b 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_128_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_128_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_192_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_192_64_kernel.sm87.cpp index b55a9b29..5f51d0f9 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_192_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_192_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_192_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_192_64_kernel.sm90.cpp index a486db0f..2b0aec86 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_192_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_192_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_256_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_256_64_kernel.sm87.cpp index dcac39f3..3cd2a96d 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_256_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_256_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_256_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_256_64_kernel.sm90.cpp index 9826a2c2..5a3744f1 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_256_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_256_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_384_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_384_64_kernel.sm87.cpp index b6659f16..10b61245 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_384_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_384_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_384_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_384_64_kernel.sm90.cpp index bbb5eeeb..b52902cb 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_384_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_384_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_64_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_64_64_kernel.sm80.cpp index f9fd6183..84db4d9b 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_64_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_64_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_64_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_64_64_kernel.sm87.cpp index 6441c74a..abdc3f80 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_64_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_64_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_64_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_64_64_kernel.sm90.cpp index df8cda25..dc88c038 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_64_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_64_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_96_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_96_64_kernel.sm80.cpp index e62d93aa..014442ea 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_96_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_96_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_96_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_96_64_kernel.sm87.cpp index 590c0df4..6d830826 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_96_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_96_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_96_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_96_64_kernel.sm90.cpp index be698b64..b345aad7 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_96_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_il_int8_96_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_32_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_32_kernel.sm75.cpp index ce3baa27..310bd7b3 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_32_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_32_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_32_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_32_kernel.sm80.cpp index 0abcf4e3..754f1117 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_32_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_32_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm72.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm72.cpp index fbf16481..e8d90371 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm72.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm72.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm75.cpp index 56cb1930..208f99b1 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm80.cpp index f7b86091..28063c3a 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm86.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm86.cpp index fe49aaa3..9073a280 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm86.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm86.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm87.cpp index b84b0dc8..a7c7067b 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm90.cpp index 6f889451..2db0cf89 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_128_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm72.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm72.cpp index 3c3735d1..81f815ab 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm72.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm72.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm75.cpp index dfe6d8ce..c2725c28 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm80.cpp index 8a1d2d2c..9e310f3e 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm86.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm86.cpp index 31dd3150..d7c891c6 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm86.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm86.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm87.cpp index aa2a81c9..a1a73aed 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm90.cpp index a5e4c65e..e2ba6e02 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_192_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_32_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_32_kernel.sm75.cpp index 2a729502..6b74c2fe 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_32_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_32_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_32_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_32_kernel.sm80.cpp index aeac0ebd..ecb4e343 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_32_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_32_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm72.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm72.cpp index a62c2cf9..248e3096 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm72.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm72.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm75.cpp index 3fa33ae5..c9a585ee 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm80.cpp index f597a37e..fe195f5e 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm86.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm86.cpp index 24d31716..6afbe0c8 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm86.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm86.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm87.cpp index b70f696d..f8e37cfb 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm90.cpp index 07f7b870..d170e2f1 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_256_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm72.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm72.cpp index 2d62254b..cb17f7ab 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm72.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm72.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm75.cpp index b373a064..9fbd6434 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm80.cpp index 86517581..d8c78ccd 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm86.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm86.cpp index c9196880..aeac0b9e 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm86.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm86.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm87.cpp index 70e699f8..044654af 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm90.cpp index 848c68be..6028ed75 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_384_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_32_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_32_kernel.sm75.cpp index baaf7441..36ece8b7 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_32_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_32_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_32_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_32_kernel.sm80.cpp index 68204bf6..590cbecb 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_32_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_32_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_64_kernel.sm75.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_64_kernel.sm75.cpp index 8ee4ced0..15312cff 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_64_kernel.sm75.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_64_kernel.sm75.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_64_kernel.sm80.cpp index e9bd8613..0cd60732 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_64_kernel.sm90.cpp index 48644b36..58e28091 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_512_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_64_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_64_64_kernel.sm80.cpp index 77ccb240..23019c5a 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_64_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_64_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_64_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_64_64_kernel.sm87.cpp index 2eb5c132..35635613 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_64_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_64_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_64_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_64_64_kernel.sm90.cpp index 2280de3b..4161dcd5 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_64_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_64_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_96_64_kernel.sm80.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_96_64_kernel.sm80.cpp index b7a7f1db..f9056c6d 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_96_64_kernel.sm80.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_96_64_kernel.sm80.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_96_64_kernel.sm87.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_96_64_kernel.sm87.cpp index c2e6aca4..e5689381 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_96_64_kernel.sm87.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_96_64_kernel.sm87.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_96_64_kernel.sm90.cpp b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_96_64_kernel.sm90.cpp index a4516a2d..427ab9f8 100644 --- a/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_96_64_kernel.sm90.cpp +++ b/plugin/bertQKVToContextPlugin/fused_multihead_attention_v2/src/fused_multihead_attention_v2_int8_96_64_kernel.sm90.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPlugin.cpp b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPlugin.cpp index f62f2c9a..40a42af0 100644 --- a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPlugin.cpp +++ b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/zeroPadding2d.cu b/plugin/bertQKVToContextPlugin/zeroPadding2d.cu index aa8a70c9..f8135ada 100644 --- a/plugin/bertQKVToContextPlugin/zeroPadding2d.cu +++ b/plugin/bertQKVToContextPlugin/zeroPadding2d.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/bertQKVToContextPlugin/zeroPadding2d.h b/plugin/bertQKVToContextPlugin/zeroPadding2d.h index bc1409a2..faa85ebe 100644 --- a/plugin/bertQKVToContextPlugin/zeroPadding2d.h +++ b/plugin/bertQKVToContextPlugin/zeroPadding2d.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/clipPlugin/CMakeLists.txt b/plugin/clipPlugin/CMakeLists.txt index 1f1d4169..f1f6081b 100644 --- a/plugin/clipPlugin/CMakeLists.txt +++ b/plugin/clipPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/clipPlugin/clip.cu b/plugin/clipPlugin/clip.cu index f407ebbc..44bc1f73 100644 --- a/plugin/clipPlugin/clip.cu +++ b/plugin/clipPlugin/clip.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/clipPlugin/clip.h b/plugin/clipPlugin/clip.h index 70a53143..e21e8b43 100644 --- a/plugin/clipPlugin/clip.h +++ b/plugin/clipPlugin/clip.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/CMakeLists.txt b/plugin/common/CMakeLists.txt index 12ab940b..af59d7f7 100644 --- a/plugin/common/CMakeLists.txt +++ b/plugin/common/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/bboxUtils.h b/plugin/common/bboxUtils.h index 028eeb81..6419611d 100644 --- a/plugin/common/bboxUtils.h +++ b/plugin/common/bboxUtils.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/bertCommon.h b/plugin/common/bertCommon.h index e34e954f..4cb33551 100644 --- a/plugin/common/bertCommon.h +++ b/plugin/common/bertCommon.h @@ -86,6 +86,17 @@ constexpr size_t packedMaskSize384 = xmmasM384 * threadsPerCta384; namespace nvinfer1 { +namespace pluginInternal +{ +template +struct CudaDeleter +{ + void operator()(T* buf) + { + PLUGIN_CUASSERT(cudaFree(buf)); + } +}; +} // namespace pluginInternal namespace plugin { namespace bert @@ -308,16 +319,7 @@ struct CublasConfigHelper }; template -struct CudaDeleter -{ - void operator()(T* buf) - { - PLUGIN_CUASSERT(cudaFree(buf)); - } -}; - -template -using cuda_unique_ptr = std::unique_ptr>; +using cuda_unique_ptr = std::unique_ptr>; template using cuda_shared_ptr = std::shared_ptr; @@ -325,7 +327,7 @@ using cuda_shared_ptr = std::shared_ptr; template void make_cuda_shared(cuda_shared_ptr& ptr, void* cudaMem) { - ptr.reset(static_cast(cudaMem), bert::CudaDeleter()); + ptr.reset(static_cast(cudaMem), pluginInternal::CudaDeleter()); } struct WeightsWithOwnership : public nvinfer1::Weights diff --git a/plugin/common/cub_helper.h b/plugin/common/cub_helper.h index ee8402c4..7cc35848 100644 --- a/plugin/common/cub_helper.h +++ b/plugin/common/cub_helper.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/cudaDriverWrapper.cpp b/plugin/common/cudaDriverWrapper.cpp index 5e317564..fa83866c 100644 --- a/plugin/common/cudaDriverWrapper.cpp +++ b/plugin/common/cudaDriverWrapper.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/cudaDriverWrapper.h b/plugin/common/cudaDriverWrapper.h index b105e3c2..209ed3f8 100644 --- a/plugin/common/cudaDriverWrapper.h +++ b/plugin/common/cudaDriverWrapper.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/dimsHelpers.h b/plugin/common/dimsHelpers.h index 8198590b..239a63ac 100644 --- a/plugin/common/dimsHelpers.h +++ b/plugin/common/dimsHelpers.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/half.h b/plugin/common/half.h index 28825bb1..af49356a 100644 --- a/plugin/common/half.h +++ b/plugin/common/half.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/CMakeLists.txt b/plugin/common/kernels/CMakeLists.txt index 1f1d4169..f1f6081b 100644 --- a/plugin/common/kernels/CMakeLists.txt +++ b/plugin/common/kernels/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/bboxDeltas2Proposals.cu b/plugin/common/kernels/bboxDeltas2Proposals.cu index 945d3bc5..0be5e90d 100644 --- a/plugin/common/kernels/bboxDeltas2Proposals.cu +++ b/plugin/common/kernels/bboxDeltas2Proposals.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/cropAndResizeKernel.cu b/plugin/common/kernels/cropAndResizeKernel.cu index aa1bec14..fdae167b 100644 --- a/plugin/common/kernels/cropAndResizeKernel.cu +++ b/plugin/common/kernels/cropAndResizeKernel.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/decodeBbox3DKernels.cu b/plugin/common/kernels/decodeBbox3DKernels.cu index ac53c098..f1592e49 100644 --- a/plugin/common/kernels/decodeBbox3DKernels.cu +++ b/plugin/common/kernels/decodeBbox3DKernels.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/detectionForward.cu b/plugin/common/kernels/detectionForward.cu index 09cba7dd..6f28c15a 100644 --- a/plugin/common/kernels/detectionForward.cu +++ b/plugin/common/kernels/detectionForward.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/extractFgScores.cu b/plugin/common/kernels/extractFgScores.cu index f087e012..1785bf0a 100644 --- a/plugin/common/kernels/extractFgScores.cu +++ b/plugin/common/kernels/extractFgScores.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/generateAnchors.cu b/plugin/common/kernels/generateAnchors.cu index 398cf1b7..b80383f7 100644 --- a/plugin/common/kernels/generateAnchors.cu +++ b/plugin/common/kernels/generateAnchors.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/gridAnchorLayer.cu b/plugin/common/kernels/gridAnchorLayer.cu index 666997c5..2475a943 100644 --- a/plugin/common/kernels/gridAnchorLayer.cu +++ b/plugin/common/kernels/gridAnchorLayer.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/kernel.cpp b/plugin/common/kernels/kernel.cpp index 7f8a00dc..d5c0966a 100644 --- a/plugin/common/kernels/kernel.cpp +++ b/plugin/common/kernels/kernel.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/lReLU.cu b/plugin/common/kernels/lReLU.cu index 8a720ff1..87c42724 100644 --- a/plugin/common/kernels/lReLU.cu +++ b/plugin/common/kernels/lReLU.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/maskRCNNKernels.cu b/plugin/common/kernels/maskRCNNKernels.cu index b79d55e0..0a9d8083 100644 --- a/plugin/common/kernels/maskRCNNKernels.cu +++ b/plugin/common/kernels/maskRCNNKernels.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/maskRCNNKernels.h b/plugin/common/kernels/maskRCNNKernels.h index 71ed0784..433d7ca2 100644 --- a/plugin/common/kernels/maskRCNNKernels.h +++ b/plugin/common/kernels/maskRCNNKernels.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/nmsLayer.cu b/plugin/common/kernels/nmsLayer.cu index 0fdcdf39..8ce2a8f2 100644 --- a/plugin/common/kernels/nmsLayer.cu +++ b/plugin/common/kernels/nmsLayer.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/permuteData.cu b/plugin/common/kernels/permuteData.cu index dd43f04c..185e4c53 100644 --- a/plugin/common/kernels/permuteData.cu +++ b/plugin/common/kernels/permuteData.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/pillarScatterKernels.cu b/plugin/common/kernels/pillarScatterKernels.cu index 528a2665..6ee3c3e8 100644 --- a/plugin/common/kernels/pillarScatterKernels.cu +++ b/plugin/common/kernels/pillarScatterKernels.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/priorBoxLayer.cu b/plugin/common/kernels/priorBoxLayer.cu index 3c6e160b..af17af22 100644 --- a/plugin/common/kernels/priorBoxLayer.cu +++ b/plugin/common/kernels/priorBoxLayer.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/proposalKernel.cu b/plugin/common/kernels/proposalKernel.cu index 8fcaab14..82f2db9b 100644 --- a/plugin/common/kernels/proposalKernel.cu +++ b/plugin/common/kernels/proposalKernel.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/proposalsForward.cu b/plugin/common/kernels/proposalsForward.cu index cab00063..2be3a087 100644 --- a/plugin/common/kernels/proposalsForward.cu +++ b/plugin/common/kernels/proposalsForward.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/reducedMathPlugin.h b/plugin/common/kernels/reducedMathPlugin.h index 777a5e51..d7c17f92 100644 --- a/plugin/common/kernels/reducedMathPlugin.h +++ b/plugin/common/kernels/reducedMathPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/regionForward.cu b/plugin/common/kernels/regionForward.cu index a948dc4f..b33b9b3f 100644 --- a/plugin/common/kernels/regionForward.cu +++ b/plugin/common/kernels/regionForward.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/reorgForward.cu b/plugin/common/kernels/reorgForward.cu index becc87a7..ef5fdb7a 100644 --- a/plugin/common/kernels/reorgForward.cu +++ b/plugin/common/kernels/reorgForward.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/roiPooling.cu b/plugin/common/kernels/roiPooling.cu index abac39a2..353173cc 100644 --- a/plugin/common/kernels/roiPooling.cu +++ b/plugin/common/kernels/roiPooling.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/rproiInferenceFused.cu b/plugin/common/kernels/rproiInferenceFused.cu index 46d0243b..db1161bb 100644 --- a/plugin/common/kernels/rproiInferenceFused.cu +++ b/plugin/common/kernels/rproiInferenceFused.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/sortScoresPerClass.cu b/plugin/common/kernels/sortScoresPerClass.cu index 1ac96086..cd62df64 100644 --- a/plugin/common/kernels/sortScoresPerClass.cu +++ b/plugin/common/kernels/sortScoresPerClass.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/sortScoresPerImage.cu b/plugin/common/kernels/sortScoresPerImage.cu index 2137bc09..99749c53 100644 --- a/plugin/common/kernels/sortScoresPerImage.cu +++ b/plugin/common/kernels/sortScoresPerImage.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/kernels/voxelGeneratorKernels.cu b/plugin/common/kernels/voxelGeneratorKernels.cu index 785a7e63..57b71798 100644 --- a/plugin/common/kernels/voxelGeneratorKernels.cu +++ b/plugin/common/kernels/voxelGeneratorKernels.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/mrcnn_config.h b/plugin/common/mrcnn_config.h index 5b3673ca..88added0 100644 --- a/plugin/common/mrcnn_config.h +++ b/plugin/common/mrcnn_config.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/nmsUtils.h b/plugin/common/nmsUtils.h index 28a4aa7e..8dbd03ff 100644 --- a/plugin/common/nmsUtils.h +++ b/plugin/common/nmsUtils.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/reducedMathPlugin.cpp b/plugin/common/reducedMathPlugin.cpp index 4e33680a..bedd8d2b 100644 --- a/plugin/common/reducedMathPlugin.cpp +++ b/plugin/common/reducedMathPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/serialize.hpp b/plugin/common/serialize.hpp index 8a29dd46..8fcef07f 100644 --- a/plugin/common/serialize.hpp +++ b/plugin/common/serialize.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/templates.h b/plugin/common/templates.h index 298bb8c2..2870bfd6 100644 --- a/plugin/common/templates.h +++ b/plugin/common/templates.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/vfcCommon.cpp b/plugin/common/vfcCommon.cpp index 7122d0d4..8664ab56 100644 --- a/plugin/common/vfcCommon.cpp +++ b/plugin/common/vfcCommon.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/common/vfcCommon.h b/plugin/common/vfcCommon.h index ee84dc97..7b7db007 100644 --- a/plugin/common/vfcCommon.h +++ b/plugin/common/vfcCommon.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/coordConvACPlugin/CMakeLists.txt b/plugin/coordConvACPlugin/CMakeLists.txt index df2f2da8..0e7b1e6e 100644 --- a/plugin/coordConvACPlugin/CMakeLists.txt +++ b/plugin/coordConvACPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/coordConvACPlugin/coordConvACPlugin.cpp b/plugin/coordConvACPlugin/coordConvACPlugin.cpp index 63462fcd..671e06ee 100644 --- a/plugin/coordConvACPlugin/coordConvACPlugin.cpp +++ b/plugin/coordConvACPlugin/coordConvACPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/coordConvACPlugin/coordConvACPlugin.h b/plugin/coordConvACPlugin/coordConvACPlugin.h index 1776d6f7..0df045ce 100644 --- a/plugin/coordConvACPlugin/coordConvACPlugin.h +++ b/plugin/coordConvACPlugin/coordConvACPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/coordConvACPlugin/coordConvACPluginKernels.cu b/plugin/coordConvACPlugin/coordConvACPluginKernels.cu index a0130a16..8f32aa87 100644 --- a/plugin/coordConvACPlugin/coordConvACPluginKernels.cu +++ b/plugin/coordConvACPlugin/coordConvACPluginKernels.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/cropAndResizePlugin/CMakeLists.txt b/plugin/cropAndResizePlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/cropAndResizePlugin/CMakeLists.txt +++ b/plugin/cropAndResizePlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/cropAndResizePlugin/cropAndResizePlugin.cpp b/plugin/cropAndResizePlugin/cropAndResizePlugin.cpp index a0b19fc4..f8d5a731 100644 --- a/plugin/cropAndResizePlugin/cropAndResizePlugin.cpp +++ b/plugin/cropAndResizePlugin/cropAndResizePlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/cropAndResizePlugin/cropAndResizePlugin.h b/plugin/cropAndResizePlugin/cropAndResizePlugin.h index 54c8f16b..c0f9d33d 100644 --- a/plugin/cropAndResizePlugin/cropAndResizePlugin.h +++ b/plugin/cropAndResizePlugin/cropAndResizePlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/decodeBbox3DPlugin/CMakeLists.txt b/plugin/decodeBbox3DPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/decodeBbox3DPlugin/CMakeLists.txt +++ b/plugin/decodeBbox3DPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/decodeBbox3DPlugin/decodeBbox3D.cpp b/plugin/decodeBbox3DPlugin/decodeBbox3D.cpp index f9e9faa5..96884a5b 100644 --- a/plugin/decodeBbox3DPlugin/decodeBbox3D.cpp +++ b/plugin/decodeBbox3DPlugin/decodeBbox3D.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/decodeBbox3DPlugin/decodeBbox3D.h b/plugin/decodeBbox3DPlugin/decodeBbox3D.h index ea85785a..65fbb5ae 100644 --- a/plugin/decodeBbox3DPlugin/decodeBbox3D.h +++ b/plugin/decodeBbox3DPlugin/decodeBbox3D.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/detectionLayerPlugin/CMakeLists.txt b/plugin/detectionLayerPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/detectionLayerPlugin/CMakeLists.txt +++ b/plugin/detectionLayerPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/detectionLayerPlugin/detectionLayerPlugin.cpp b/plugin/detectionLayerPlugin/detectionLayerPlugin.cpp index 840156cd..cd243c11 100644 --- a/plugin/detectionLayerPlugin/detectionLayerPlugin.cpp +++ b/plugin/detectionLayerPlugin/detectionLayerPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/detectionLayerPlugin/detectionLayerPlugin.h b/plugin/detectionLayerPlugin/detectionLayerPlugin.h index adbf535d..88ac12f5 100644 --- a/plugin/detectionLayerPlugin/detectionLayerPlugin.h +++ b/plugin/detectionLayerPlugin/detectionLayerPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/disentangledAttentionPlugin/CMakeLists.txt b/plugin/disentangledAttentionPlugin/CMakeLists.txt index df2f2da8..0e7b1e6e 100644 --- a/plugin/disentangledAttentionPlugin/CMakeLists.txt +++ b/plugin/disentangledAttentionPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp b/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp index c79096a5..d9bf788f 100644 --- a/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp +++ b/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.h b/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.h index 7d77a514..f9d01a4c 100644 --- a/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.h +++ b/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/disentangledAttentionPlugin/disentangledKernel.cu b/plugin/disentangledAttentionPlugin/disentangledKernel.cu index f90a98e6..2636fd8f 100644 --- a/plugin/disentangledAttentionPlugin/disentangledKernel.cu +++ b/plugin/disentangledAttentionPlugin/disentangledKernel.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/efficientNMSPlugin/CMakeLists.txt b/plugin/efficientNMSPlugin/CMakeLists.txt index 1f1d4169..f1f6081b 100644 --- a/plugin/efficientNMSPlugin/CMakeLists.txt +++ b/plugin/efficientNMSPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/efficientNMSPlugin/efficientNMSInference.cu b/plugin/efficientNMSPlugin/efficientNMSInference.cu index ba99cb56..f3eee1a3 100644 --- a/plugin/efficientNMSPlugin/efficientNMSInference.cu +++ b/plugin/efficientNMSPlugin/efficientNMSInference.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/efficientNMSPlugin/efficientNMSInference.cuh b/plugin/efficientNMSPlugin/efficientNMSInference.cuh index bf12c359..c16bdb40 100644 --- a/plugin/efficientNMSPlugin/efficientNMSInference.cuh +++ b/plugin/efficientNMSPlugin/efficientNMSInference.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/efficientNMSPlugin/efficientNMSInference.h b/plugin/efficientNMSPlugin/efficientNMSInference.h index d9ec3192..fa4749bd 100644 --- a/plugin/efficientNMSPlugin/efficientNMSInference.h +++ b/plugin/efficientNMSPlugin/efficientNMSInference.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/efficientNMSPlugin/efficientNMSParameters.h b/plugin/efficientNMSPlugin/efficientNMSParameters.h index 89829089..c4b6dc51 100644 --- a/plugin/efficientNMSPlugin/efficientNMSParameters.h +++ b/plugin/efficientNMSPlugin/efficientNMSParameters.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp b/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp index 1a8692ae..71836943 100644 --- a/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp +++ b/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/efficientNMSPlugin/efficientNMSPlugin.h b/plugin/efficientNMSPlugin/efficientNMSPlugin.h index afceec01..c7248d91 100644 --- a/plugin/efficientNMSPlugin/efficientNMSPlugin.h +++ b/plugin/efficientNMSPlugin/efficientNMSPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/efficientNMSPlugin/tftrt/CMakeLists.txt b/plugin/efficientNMSPlugin/tftrt/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/efficientNMSPlugin/tftrt/CMakeLists.txt +++ b/plugin/efficientNMSPlugin/tftrt/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.cpp b/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.cpp index f5c86365..3aef2fe6 100644 --- a/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.cpp +++ b/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.h b/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.h index e1e98052..2ad7a2f0 100644 --- a/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.h +++ b/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.cpp b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.cpp index 25c8e0ef..af75d75d 100644 --- a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.cpp +++ b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h index 51b09148..58e07289 100644 --- a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h +++ b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/embLayerNormPlugin/CMakeLists.txt b/plugin/embLayerNormPlugin/CMakeLists.txt index f49d60bd..0fbe405b 100644 --- a/plugin/embLayerNormPlugin/CMakeLists.txt +++ b/plugin/embLayerNormPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/embLayerNormPlugin/embLayerNormKernel.cu b/plugin/embLayerNormPlugin/embLayerNormKernel.cu index a32d14e5..6e6707d7 100644 --- a/plugin/embLayerNormPlugin/embLayerNormKernel.cu +++ b/plugin/embLayerNormPlugin/embLayerNormKernel.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/embLayerNormPlugin/embLayerNormPlugin.cpp b/plugin/embLayerNormPlugin/embLayerNormPlugin.cpp index 8e392b82..ab523971 100644 --- a/plugin/embLayerNormPlugin/embLayerNormPlugin.cpp +++ b/plugin/embLayerNormPlugin/embLayerNormPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/embLayerNormPlugin/embLayerNormPlugin.h b/plugin/embLayerNormPlugin/embLayerNormPlugin.h index eb21d268..5eb40958 100644 --- a/plugin/embLayerNormPlugin/embLayerNormPlugin.h +++ b/plugin/embLayerNormPlugin/embLayerNormPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenKernelHFace.cu b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenKernelHFace.cu index db8f6b06..a23f3326 100644 --- a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenKernelHFace.cu +++ b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenKernelHFace.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenKernelMTron.cu b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenKernelMTron.cu index 95e45820..2fddfe02 100644 --- a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenKernelMTron.cu +++ b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenKernelMTron.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.cpp b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.cpp index 4b6bd72d..4313faa7 100644 --- a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.cpp +++ b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.h b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.h index 80a0cc57..d3141a6b 100644 --- a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.h +++ b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/exports-vfc_plugin.def b/plugin/exports-vfc_plugin.def index d47954b3..28a79242 100644 --- a/plugin/exports-vfc_plugin.def +++ b/plugin/exports-vfc_plugin.def @@ -1,4 +1,4 @@ -; SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +; SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. ; SPDX-License-Identifier: Apache-2.0 ; ; Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,7 +13,7 @@ ; See the License for the specific language governing permissions and ; limitations under the License. -LIBRARY nvinfer_vc_plugin +LIBRARY nvinfer_vc_plugin_10 EXPORTS setLoggerFinder getPluginCreators diff --git a/plugin/exports-vfc_plugin.map b/plugin/exports-vfc_plugin.map index b90d58ce..7171544b 100644 --- a/plugin/exports-vfc_plugin.map +++ b/plugin/exports-vfc_plugin.map @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/exports.def b/plugin/exports.def index 6dac36fe..20503473 100644 --- a/plugin/exports.def +++ b/plugin/exports.def @@ -1,4 +1,4 @@ -; SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +; SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. ; SPDX-License-Identifier: Apache-2.0 ; ; Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,7 +13,7 @@ ; See the License for the specific language governing permissions and ; limitations under the License. -LIBRARY nvinfer_plugin +LIBRARY nvinfer_plugin_10 EXPORTS getInferLibVersion getPluginRegistry diff --git a/plugin/exports.map b/plugin/exports.map index 64de08ba..b68b1d16 100644 --- a/plugin/exports.map +++ b/plugin/exports.map @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/fcPlugin/CMakeLists.txt b/plugin/fcPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/fcPlugin/CMakeLists.txt +++ b/plugin/fcPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/fcPlugin/fcPlugin.cpp b/plugin/fcPlugin/fcPlugin.cpp index c98ae433..fd0c1339 100644 --- a/plugin/fcPlugin/fcPlugin.cpp +++ b/plugin/fcPlugin/fcPlugin.cpp @@ -140,7 +140,7 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera void const* A, int32_t const& lda, void const* B, int32_t const& ldb, void const* beta, // host pointer void* C, int32_t const& ldc, void* workSpace, size_t workSpaceSize, cublasComputeType_t computeType, cudaDataType_t scaleType, cudaDataType_t Atype, cudaDataType_t Btype, cudaDataType_t Ctype, - std::vector& perfResults) + std::vector& perfResults, cudaStream_t stream) { cublasStatus_t status = CUBLAS_STATUS_SUCCESS; @@ -153,7 +153,6 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera cudaEvent_t startEvent = nullptr; cudaEvent_t stopEvent = nullptr; - cudaStream_t stream = nullptr; CublasLtWrapper& cublasLtWrapper = getCublasLtWrapper(); @@ -520,13 +519,20 @@ void FCPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* inputs, int if (mAlgo.data[0] == 0 && memcmp(mAlgo.data, mAlgo.data + 1, sizeof(mAlgo.data) - sizeof(mAlgo.data[0])) == 0) { gLogVerbose << "FCPluginDynamic gemmSearch\n"; + if (mSharedStream == nullptr) + { + SharedStream ss{}; + mSharedStream = static_cast( + getPluginRegistry()->acquirePluginResource(kFCPLUGIN_SHARED_STREAM_KEY, &ss)) + ->mStream; + } if (mType == DataType::kFLOAT) { - mAlgo = gemmSearch(mOutDim, mNmax, mK, kMAX_WORKSPACE_BYTES, actualWorkspace); + mAlgo = gemmSearch(mOutDim, mNmax, mK, kMAX_WORKSPACE_BYTES, actualWorkspace, mSharedStream); } else if (mType == DataType::kHALF) { - mAlgo = gemmSearch(mOutDim, mNmax, mK, kMAX_WORKSPACE_BYTES, actualWorkspace); + mAlgo = gemmSearch(mOutDim, mNmax, mK, kMAX_WORKSPACE_BYTES, actualWorkspace, mSharedStream); } } @@ -656,6 +662,11 @@ int32_t FCPluginDynamic::initialize() noexcept void FCPluginDynamic::terminate() noexcept { gLogVerbose << "FCPluginDynamic terminate\n"; + if (mSharedStream) + { + TRT_UNUSED(getPluginRegistry()->releasePluginResource(kFCPLUGIN_SHARED_STREAM_KEY)); + mSharedStream = nullptr; + } } size_t FCPluginDynamic::getSerializationSize() const noexcept diff --git a/plugin/fcPlugin/fcPlugin.h b/plugin/fcPlugin/fcPlugin.h index 1ba56f7b..855ce96d 100644 --- a/plugin/fcPlugin/fcPlugin.h +++ b/plugin/fcPlugin/fcPlugin.h @@ -31,6 +31,67 @@ namespace nvinfer1 { + +namespace pluginInternal +{ +class SharedStream : public IPluginResource +{ +public: + SharedStream(bool init = false) + { + if (init) + { + PLUGIN_CUASSERT(cudaStreamCreate(&mStream)); + } + } + + void free() + { + if (mStream != nullptr) + { + PLUGIN_CUASSERT(cudaStreamDestroy(mStream)); + mStream = nullptr; + } + } + + int32_t release() noexcept override + { + try + { + free(); + } + catch (std::exception const& e) + { + return -1; + } + return 0; + } + + IPluginResource* clone() noexcept override + { + std::unique_ptr cloned{}; + try + { + cloned = std::make_unique(/* init */ true); + } + catch (std::exception const& e) + { + return nullptr; + } + return cloned.release(); + } + + ~SharedStream() override + { + if (mStream) + { + free(); + } + } + + cudaStream_t mStream{nullptr}; +}; +} // namespace pluginInternal namespace plugin { namespace bert @@ -41,6 +102,8 @@ struct GemmTypes { }; +char const* const kFCPLUGIN_SHARED_STREAM_KEY{"fcPlugin_timing_key"}; + template <> struct GemmTypes { @@ -174,11 +237,12 @@ void LtGemmSearch(nvinfer1::pluginInternal::cublasLtHandle_t ltHandle, cudaDataType_t Atype, cudaDataType_t Btype, cudaDataType_t Ctype, - std::vector &perfResults); + std::vector &perfResults, + cudaStream_t stream); // clang-format on template void LtGemmSearch(nvinfer1::pluginInternal::cublasLtHandle_t ltHandle, Gemm const& g, void* workSpace, - size_t workSpaceSize, std::vector& perfResults) + size_t workSpaceSize, std::vector& perfResults, cudaStream_t stream) { // clang-format off LtGemmSearch( @@ -203,7 +267,8 @@ void LtGemmSearch(nvinfer1::pluginInternal::cublasLtHandle_t ltHandle, Gemm c Gemm::Types::cudaTypeI, Gemm::Types::cudaTypeI, Gemm::Types::cudaTypeO, - perfResults + perfResults, + stream ); // clang-format on } @@ -380,29 +445,30 @@ struct AlgoProps }; template -nvinfer1::pluginInternal::cublasLtMatmulAlgo_t gemmSearch( - int32_t const m, int32_t const n, int32_t const k, size_t const workspaceSize, size_t& actualWorkspace) +nvinfer1::pluginInternal::cublasLtMatmulAlgo_t gemmSearch(int32_t const m, int32_t const n, int32_t const k, + size_t const workspaceSize, size_t& actualWorkspace, cudaStream_t& stream) { Gemm g(m, n, k, false, false); std::vector perfResults(kNB_ALGO_COMBINATIONS); - PLUGIN_CUASSERT(cudaMalloc(reinterpret_cast(&g.A), g.bytesA)); - PLUGIN_CUASSERT(cudaMalloc(reinterpret_cast(&g.B), g.bytesB)); - PLUGIN_CUASSERT(cudaMalloc(reinterpret_cast(&g.C), g.bytesC)); + PLUGIN_CUASSERT(cudaMallocAsync(reinterpret_cast(&g.A), g.bytesA, stream)); + PLUGIN_CUASSERT(cudaMallocAsync(reinterpret_cast(&g.B), g.bytesB, stream)); + PLUGIN_CUASSERT(cudaMallocAsync(reinterpret_cast(&g.C), g.bytesC, stream)); void* workspace; - PLUGIN_CUASSERT(cudaMalloc(&workspace, workspaceSize)); + PLUGIN_CUASSERT(cudaMallocAsync(&workspace, workspaceSize, stream)); nvinfer1::pluginInternal::cublasLtHandle_t lt; nvinfer1::pluginInternal::CublasLtWrapper& cublasLtWrapper = nvinfer1::pluginInternal::getCublasLtWrapper(); PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtCreate(<)); - LtGemmSearch(lt, g, workspace, workspaceSize, perfResults); - PLUGIN_CUASSERT(cudaDeviceSynchronize()); + + LtGemmSearch(lt, g, workspace, workspaceSize, perfResults, stream); + PLUGIN_CUASSERT(cudaStreamSynchronize(stream)); PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtDestroy(lt)); - PLUGIN_CUASSERT(cudaFree(workspace)); + PLUGIN_CUASSERT(cudaFreeAsync(workspace, stream)); - PLUGIN_CUASSERT(cudaFree(g.A)); - PLUGIN_CUASSERT(cudaFree(g.B)); - PLUGIN_CUASSERT(cudaFree(g.C)); + PLUGIN_CUASSERT(cudaFreeAsync(g.A, stream)); + PLUGIN_CUASSERT(cudaFreeAsync(g.B, stream)); + PLUGIN_CUASSERT(cudaFreeAsync(g.C, stream)); actualWorkspace = perfResults[0].workspaceSize; return perfResults[0].algo; @@ -410,27 +476,28 @@ nvinfer1::pluginInternal::cublasLtMatmulAlgo_t gemmSearch( template nvinfer1::pluginInternal::cublasLtMatmulAlgo_t gemmSearch( - Gemm& g, size_t const workspaceSize, size_t& actualWorkspace) + Gemm& g, size_t const workspaceSize, size_t& actualWorkspace, cudaStream_t& stream) { std::vector perfResults(kNB_ALGO_COMBINATIONS); - PLUGIN_CUASSERT(cudaMalloc(&g.A, g.bytesA)); - PLUGIN_CUASSERT(cudaMalloc(&g.B, g.bytesB)); - PLUGIN_CUASSERT(cudaMalloc(&g.C, g.bytesC)); + PLUGIN_CUASSERT(cudaMallocAsync(&g.A, g.bytesA, stream)); + PLUGIN_CUASSERT(cudaMallocAsync(&g.B, g.bytesB, stream)); + PLUGIN_CUASSERT(cudaMallocAsync(&g.C, g.bytesC, stream)); void* workspace; - PLUGIN_CUASSERT(cudaMalloc(&workspace, workspaceSize)); + PLUGIN_CUASSERT(cudaMallocAsync(&workspace, workspaceSize, stream)); nvinfer1::pluginInternal::cublasLtHandle_t lt; nvinfer1::pluginInternal::CublasLtWrapper& cublasLtWrapper = nvinfer1::pluginInternal::getCublasLtWrapper(); PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtCreate(<)); - LtGemmSearch(lt, g, workspace, workspaceSize, perfResults); - PLUGIN_CUASSERT(cudaDeviceSynchronize()); + + LtGemmSearch(lt, g, workspace, workspaceSize, perfResults, stream); + PLUGIN_CUASSERT(cudaStreamSynchronize(stream)); PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtDestroy(lt)); - PLUGIN_CUASSERT(cudaFree(workspace)); + PLUGIN_CUASSERT(cudaFreeAsync(workspace, stream)); - PLUGIN_CUASSERT(cudaFree(g.A)); - PLUGIN_CUASSERT(cudaFree(g.B)); - PLUGIN_CUASSERT(cudaFree(g.C)); + PLUGIN_CUASSERT(cudaFreeAsync(g.A, stream)); + PLUGIN_CUASSERT(cudaFreeAsync(g.B, stream)); + PLUGIN_CUASSERT(cudaFreeAsync(g.C, stream)); actualWorkspace = perfResults[0].workspaceSize; return perfResults[0].algo; @@ -500,6 +567,7 @@ class FCPluginDynamic : public nvinfer1::IPluginV2DynamicExt bert::cuda_unique_ptr mWdev; LtContext mLtContext; + cudaStream_t mSharedStream{nullptr}; }; class FCPluginDynamicCreator : public nvinfer1::IPluginCreator @@ -527,6 +595,7 @@ class FCPluginDynamicCreator : public nvinfer1::IPluginCreator static std::vector mPluginAttributes; std::string mNamespace; }; + } // namespace bert } // namespace plugin } // namespace nvinfer1 diff --git a/plugin/flattenConcat/CMakeLists.txt b/plugin/flattenConcat/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/flattenConcat/CMakeLists.txt +++ b/plugin/flattenConcat/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/geluPlugin/CMakeLists.txt b/plugin/geluPlugin/CMakeLists.txt index f49d60bd..0fbe405b 100644 --- a/plugin/geluPlugin/CMakeLists.txt +++ b/plugin/geluPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/geluPlugin/geluKernel.cu b/plugin/geluPlugin/geluKernel.cu index 823ae803..fd7f8d54 100644 --- a/plugin/geluPlugin/geluKernel.cu +++ b/plugin/geluPlugin/geluKernel.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/geluPlugin/geluPlugin.cpp b/plugin/geluPlugin/geluPlugin.cpp index ca0d775f..dc6d48f8 100644 --- a/plugin/geluPlugin/geluPlugin.cpp +++ b/plugin/geluPlugin/geluPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/geluPlugin/geluPlugin.h b/plugin/geluPlugin/geluPlugin.h index 14bc0f6a..724d4ee8 100644 --- a/plugin/geluPlugin/geluPlugin.h +++ b/plugin/geluPlugin/geluPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/generateDetectionPlugin/CMakeLists.txt b/plugin/generateDetectionPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/generateDetectionPlugin/CMakeLists.txt +++ b/plugin/generateDetectionPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/generateDetectionPlugin/generateDetectionPlugin.cpp b/plugin/generateDetectionPlugin/generateDetectionPlugin.cpp index 574f2ba2..7c7f5f82 100644 --- a/plugin/generateDetectionPlugin/generateDetectionPlugin.cpp +++ b/plugin/generateDetectionPlugin/generateDetectionPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/generateDetectionPlugin/generateDetectionPlugin.h b/plugin/generateDetectionPlugin/generateDetectionPlugin.h index 75dd50f3..f888f8a7 100644 --- a/plugin/generateDetectionPlugin/generateDetectionPlugin.h +++ b/plugin/generateDetectionPlugin/generateDetectionPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/gridAnchorPlugin/CMakeLists.txt b/plugin/gridAnchorPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/gridAnchorPlugin/CMakeLists.txt +++ b/plugin/gridAnchorPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/groupNormalizationPlugin/CMakeLists.txt b/plugin/groupNormalizationPlugin/CMakeLists.txt index 1f1d4169..f1f6081b 100644 --- a/plugin/groupNormalizationPlugin/CMakeLists.txt +++ b/plugin/groupNormalizationPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/groupNormalizationPlugin/groupNormalizationKernel.cu b/plugin/groupNormalizationPlugin/groupNormalizationKernel.cu index fc051e7f..4ab6dd12 100644 --- a/plugin/groupNormalizationPlugin/groupNormalizationKernel.cu +++ b/plugin/groupNormalizationPlugin/groupNormalizationKernel.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/instanceNormalizationPlugin/CMakeLists.txt b/plugin/instanceNormalizationPlugin/CMakeLists.txt index 1f1d4169..f1f6081b 100644 --- a/plugin/instanceNormalizationPlugin/CMakeLists.txt +++ b/plugin/instanceNormalizationPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/instanceNormalizationPlugin/instanceNormCommon.h b/plugin/instanceNormalizationPlugin/instanceNormCommon.h index fb6f5bd0..938ed2cf 100644 --- a/plugin/instanceNormalizationPlugin/instanceNormCommon.h +++ b/plugin/instanceNormalizationPlugin/instanceNormCommon.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/instanceNormalizationPlugin/instanceNormFwd.h b/plugin/instanceNormalizationPlugin/instanceNormFwd.h index 5f5901bb..1836eb41 100644 --- a/plugin/instanceNormalizationPlugin/instanceNormFwd.h +++ b/plugin/instanceNormalizationPlugin/instanceNormFwd.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/instanceNormalizationPlugin/instanceNormFwdImpl.cu b/plugin/instanceNormalizationPlugin/instanceNormFwdImpl.cu index b79436e7..3bf35f6b 100644 --- a/plugin/instanceNormalizationPlugin/instanceNormFwdImpl.cu +++ b/plugin/instanceNormalizationPlugin/instanceNormFwdImpl.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/leakyReluPlugin/CMakeLists.txt b/plugin/leakyReluPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/leakyReluPlugin/CMakeLists.txt +++ b/plugin/leakyReluPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/leakyReluPlugin/lReluPlugin.cpp b/plugin/leakyReluPlugin/lReluPlugin.cpp index 28148c8b..3acf8f39 100644 --- a/plugin/leakyReluPlugin/lReluPlugin.cpp +++ b/plugin/leakyReluPlugin/lReluPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/leakyReluPlugin/lReluPlugin.h b/plugin/leakyReluPlugin/lReluPlugin.h index 60d81029..087b0a0b 100644 --- a/plugin/leakyReluPlugin/lReluPlugin.h +++ b/plugin/leakyReluPlugin/lReluPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/modulatedDeformConvPlugin/CMakeLists.txt b/plugin/modulatedDeformConvPlugin/CMakeLists.txt index df2f2da8..0e7b1e6e 100644 --- a/plugin/modulatedDeformConvPlugin/CMakeLists.txt +++ b/plugin/modulatedDeformConvPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/modulatedDeformConvPlugin/commonCudaHelper.h b/plugin/modulatedDeformConvPlugin/commonCudaHelper.h index 5466817b..336867cd 100644 --- a/plugin/modulatedDeformConvPlugin/commonCudaHelper.h +++ b/plugin/modulatedDeformConvPlugin/commonCudaHelper.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/multilevelCropAndResizePlugin/CMakeLists.txt b/plugin/multilevelCropAndResizePlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/multilevelCropAndResizePlugin/CMakeLists.txt +++ b/plugin/multilevelCropAndResizePlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.cpp b/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.cpp index 8b8c57f0..6ae3186d 100644 --- a/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.cpp +++ b/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.h b/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.h index c2f615b4..d30df9cf 100644 --- a/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.h +++ b/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/multilevelProposeROI/CMakeLists.txt b/plugin/multilevelProposeROI/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/multilevelProposeROI/CMakeLists.txt +++ b/plugin/multilevelProposeROI/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/multilevelProposeROI/multilevelProposeROIPlugin.cpp b/plugin/multilevelProposeROI/multilevelProposeROIPlugin.cpp index d9ad8add..48d3a359 100644 --- a/plugin/multilevelProposeROI/multilevelProposeROIPlugin.cpp +++ b/plugin/multilevelProposeROI/multilevelProposeROIPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/multilevelProposeROI/multilevelProposeROIPlugin.h b/plugin/multilevelProposeROI/multilevelProposeROIPlugin.h index 653958e6..e384556f 100644 --- a/plugin/multilevelProposeROI/multilevelProposeROIPlugin.h +++ b/plugin/multilevelProposeROI/multilevelProposeROIPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/multilevelProposeROI/tlt_mrcnn_config.h b/plugin/multilevelProposeROI/tlt_mrcnn_config.h index 13c8abfe..d85cc9fd 100644 --- a/plugin/multilevelProposeROI/tlt_mrcnn_config.h +++ b/plugin/multilevelProposeROI/tlt_mrcnn_config.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/multiscaleDeformableAttnPlugin/CMakeLists.txt b/plugin/multiscaleDeformableAttnPlugin/CMakeLists.txt index 1f1d4169..f1f6081b 100644 --- a/plugin/multiscaleDeformableAttnPlugin/CMakeLists.txt +++ b/plugin/multiscaleDeformableAttnPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttn.cu b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttn.cu index d6843c64..648c83fb 100644 --- a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttn.cu +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttn.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttn.h b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttn.h index ba29c0bb..50336389 100644 --- a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttn.h +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttn.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp index 18b763ba..1a87adb0 100644 --- a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableIm2ColCuda.cuh b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableIm2ColCuda.cuh index 370c4cd1..454b9f03 100644 --- a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableIm2ColCuda.cuh +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableIm2ColCuda.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/nmsPlugin/CMakeLists.txt b/plugin/nmsPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/nmsPlugin/CMakeLists.txt +++ b/plugin/nmsPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/nmsPlugin/nmsPlugin.cpp b/plugin/nmsPlugin/nmsPlugin.cpp index 458c184e..e567f8b9 100644 --- a/plugin/nmsPlugin/nmsPlugin.cpp +++ b/plugin/nmsPlugin/nmsPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/nmsPlugin/nmsPlugin.h b/plugin/nmsPlugin/nmsPlugin.h index eccefc37..dc70f2b0 100644 --- a/plugin/nmsPlugin/nmsPlugin.h +++ b/plugin/nmsPlugin/nmsPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/normalizePlugin/CMakeLists.txt b/plugin/normalizePlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/normalizePlugin/CMakeLists.txt +++ b/plugin/normalizePlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/nvFasterRCNN/CMakeLists.txt b/plugin/nvFasterRCNN/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/nvFasterRCNN/CMakeLists.txt +++ b/plugin/nvFasterRCNN/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/pillarScatterPlugin/CMakeLists.txt b/plugin/pillarScatterPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/pillarScatterPlugin/CMakeLists.txt +++ b/plugin/pillarScatterPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/pillarScatterPlugin/pillarScatter.cpp b/plugin/pillarScatterPlugin/pillarScatter.cpp index b520347f..fe47b4c0 100644 --- a/plugin/pillarScatterPlugin/pillarScatter.cpp +++ b/plugin/pillarScatterPlugin/pillarScatter.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/pillarScatterPlugin/pillarScatter.h b/plugin/pillarScatterPlugin/pillarScatter.h index cdaf0454..6a968b08 100644 --- a/plugin/pillarScatterPlugin/pillarScatter.h +++ b/plugin/pillarScatterPlugin/pillarScatter.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/priorBoxPlugin/CMakeLists.txt b/plugin/priorBoxPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/priorBoxPlugin/CMakeLists.txt +++ b/plugin/priorBoxPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/proposalLayerPlugin/CMakeLists.txt b/plugin/proposalLayerPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/proposalLayerPlugin/CMakeLists.txt +++ b/plugin/proposalLayerPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/proposalLayerPlugin/proposalLayerPlugin.cpp b/plugin/proposalLayerPlugin/proposalLayerPlugin.cpp index 1335ea66..b9847495 100644 --- a/plugin/proposalLayerPlugin/proposalLayerPlugin.cpp +++ b/plugin/proposalLayerPlugin/proposalLayerPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/proposalLayerPlugin/proposalLayerPlugin.h b/plugin/proposalLayerPlugin/proposalLayerPlugin.h index 68a0d136..d612db29 100644 --- a/plugin/proposalLayerPlugin/proposalLayerPlugin.h +++ b/plugin/proposalLayerPlugin/proposalLayerPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/proposalPlugin/CMakeLists.txt b/plugin/proposalPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/proposalPlugin/CMakeLists.txt +++ b/plugin/proposalPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/proposalPlugin/proposalPlugin.cpp b/plugin/proposalPlugin/proposalPlugin.cpp index e1bd677b..6a6e48c0 100644 --- a/plugin/proposalPlugin/proposalPlugin.cpp +++ b/plugin/proposalPlugin/proposalPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -514,7 +514,7 @@ void ProposalPlugin::setPluginNamespace(char const* libNamespace) noexcept { try { - PLUGIN_VALIDATE(libNamespace != nullptr); + PLUGIN_VALIDATE(libNamespace == nullptr); mNamespace = libNamespace; } catch (std::exception const& e) @@ -527,7 +527,7 @@ void ProposalDynamicPlugin::setPluginNamespace(char const* libNamespace) noexcep { try { - PLUGIN_VALIDATE(libNamespace != nullptr); + PLUGIN_VALIDATE(libNamespace == nullptr); mNamespace = libNamespace; } catch (std::exception const& e) diff --git a/plugin/proposalPlugin/proposalPlugin.h b/plugin/proposalPlugin/proposalPlugin.h index 05e9508f..025f1dee 100644 --- a/plugin/proposalPlugin/proposalPlugin.h +++ b/plugin/proposalPlugin/proposalPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/pyramidROIAlignPlugin/CMakeLists.txt b/plugin/pyramidROIAlignPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/pyramidROIAlignPlugin/CMakeLists.txt +++ b/plugin/pyramidROIAlignPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.cpp b/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.cpp index 141339f1..e1cf5749 100644 --- a/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.cpp +++ b/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.h b/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.h index dde6a309..9d2cf26e 100644 --- a/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.h +++ b/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/regionPlugin/CMakeLists.txt b/plugin/regionPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/regionPlugin/CMakeLists.txt +++ b/plugin/regionPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/regionPlugin/regionPlugin.cpp b/plugin/regionPlugin/regionPlugin.cpp index c6f709eb..6a140556 100644 --- a/plugin/regionPlugin/regionPlugin.cpp +++ b/plugin/regionPlugin/regionPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/regionPlugin/regionPlugin.h b/plugin/regionPlugin/regionPlugin.h index 7af234f1..66913fc0 100644 --- a/plugin/regionPlugin/regionPlugin.h +++ b/plugin/regionPlugin/regionPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/reorgPlugin/CMakeLists.txt b/plugin/reorgPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/reorgPlugin/CMakeLists.txt +++ b/plugin/reorgPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/reorgPlugin/reorgPlugin.cpp b/plugin/reorgPlugin/reorgPlugin.cpp index 0154580a..227c59d9 100644 --- a/plugin/reorgPlugin/reorgPlugin.cpp +++ b/plugin/reorgPlugin/reorgPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/reorgPlugin/reorgPlugin.h b/plugin/reorgPlugin/reorgPlugin.h index 5971e028..f0e4b2e6 100644 --- a/plugin/reorgPlugin/reorgPlugin.h +++ b/plugin/reorgPlugin/reorgPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/resizeNearestPlugin/CMakeLists.txt b/plugin/resizeNearestPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/resizeNearestPlugin/CMakeLists.txt +++ b/plugin/resizeNearestPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/resizeNearestPlugin/resizeNearestPlugin.cpp b/plugin/resizeNearestPlugin/resizeNearestPlugin.cpp index d60c91fa..75f0b73a 100644 --- a/plugin/resizeNearestPlugin/resizeNearestPlugin.cpp +++ b/plugin/resizeNearestPlugin/resizeNearestPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/resizeNearestPlugin/resizeNearestPlugin.h b/plugin/resizeNearestPlugin/resizeNearestPlugin.h index 5db5fc49..3f9f7e3e 100644 --- a/plugin/resizeNearestPlugin/resizeNearestPlugin.h +++ b/plugin/resizeNearestPlugin/resizeNearestPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/roiAlignPlugin/CMakeLists.txt b/plugin/roiAlignPlugin/CMakeLists.txt index bd8066f0..a2ac13d7 100644 --- a/plugin/roiAlignPlugin/CMakeLists.txt +++ b/plugin/roiAlignPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/roiAlignPlugin/roiAlignKernel.h b/plugin/roiAlignPlugin/roiAlignKernel.h index 890a9822..3be3faaa 100644 --- a/plugin/roiAlignPlugin/roiAlignKernel.h +++ b/plugin/roiAlignPlugin/roiAlignKernel.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/roiAlignPlugin/roiAlignPlugin.cpp b/plugin/roiAlignPlugin/roiAlignPlugin.cpp index d5e51638..5681eff5 100644 --- a/plugin/roiAlignPlugin/roiAlignPlugin.cpp +++ b/plugin/roiAlignPlugin/roiAlignPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/roiAlignPlugin/roiAlignPlugin.h b/plugin/roiAlignPlugin/roiAlignPlugin.h index f1246c83..e22d2571 100644 --- a/plugin/roiAlignPlugin/roiAlignPlugin.h +++ b/plugin/roiAlignPlugin/roiAlignPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/scatterElementsPlugin/CMakeLists.txt b/plugin/scatterElementsPlugin/CMakeLists.txt index 1f1d4169..f1f6081b 100644 --- a/plugin/scatterElementsPlugin/CMakeLists.txt +++ b/plugin/scatterElementsPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/scatterElementsPlugin/TensorInfo.cuh b/plugin/scatterElementsPlugin/TensorInfo.cuh index 0656756c..fd6dd69d 100644 --- a/plugin/scatterElementsPlugin/TensorInfo.cuh +++ b/plugin/scatterElementsPlugin/TensorInfo.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/scatterElementsPlugin/atomics.cuh b/plugin/scatterElementsPlugin/atomics.cuh index 90094c22..19c43e48 100644 --- a/plugin/scatterElementsPlugin/atomics.cuh +++ b/plugin/scatterElementsPlugin/atomics.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/scatterElementsPlugin/reducer.cuh b/plugin/scatterElementsPlugin/reducer.cuh index 7143aa9f..baa13d92 100644 --- a/plugin/scatterElementsPlugin/reducer.cuh +++ b/plugin/scatterElementsPlugin/reducer.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/scatterElementsPlugin/scatterElementsPlugin.cpp b/plugin/scatterElementsPlugin/scatterElementsPlugin.cpp index 7910ad55..babbaecc 100644 --- a/plugin/scatterElementsPlugin/scatterElementsPlugin.cpp +++ b/plugin/scatterElementsPlugin/scatterElementsPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/scatterElementsPlugin/scatterElementsPlugin.h b/plugin/scatterElementsPlugin/scatterElementsPlugin.h index a49c4448..01c2a73d 100644 --- a/plugin/scatterElementsPlugin/scatterElementsPlugin.h +++ b/plugin/scatterElementsPlugin/scatterElementsPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/scatterElementsPlugin/scatterElementsPluginKernel.cu b/plugin/scatterElementsPlugin/scatterElementsPluginKernel.cu index 7f487725..b09db5ae 100644 --- a/plugin/scatterElementsPlugin/scatterElementsPluginKernel.cu +++ b/plugin/scatterElementsPlugin/scatterElementsPluginKernel.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/scatterElementsPlugin/scatterElementsPluginKernel.h b/plugin/scatterElementsPlugin/scatterElementsPluginKernel.h index d7fa1f5a..307ef355 100644 --- a/plugin/scatterElementsPlugin/scatterElementsPluginKernel.h +++ b/plugin/scatterElementsPlugin/scatterElementsPluginKernel.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/scatterPlugin/CMakeLists.txt b/plugin/scatterPlugin/CMakeLists.txt index 1f1d4169..f1f6081b 100644 --- a/plugin/scatterPlugin/CMakeLists.txt +++ b/plugin/scatterPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/scatterPlugin/scatterLayer.cu b/plugin/scatterPlugin/scatterLayer.cu index b7409156..55fdef1f 100644 --- a/plugin/scatterPlugin/scatterLayer.cu +++ b/plugin/scatterPlugin/scatterLayer.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/skipLayerNormPlugin/CMakeLists.txt b/plugin/skipLayerNormPlugin/CMakeLists.txt index f49d60bd..0fbe405b 100644 --- a/plugin/skipLayerNormPlugin/CMakeLists.txt +++ b/plugin/skipLayerNormPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedKernelHFace.cu b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedKernelHFace.cu index 428c7483..b915dfb2 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedKernelHFace.cu +++ b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedKernelHFace.cu @@ -1,6 +1,6 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedKernelMTron.cu b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedKernelMTron.cu index f4c2a39c..7858f3e3 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedKernelMTron.cu +++ b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedKernelMTron.cu @@ -1,6 +1,6 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.cpp b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.cpp index 72061613..1b74f944 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.cpp +++ b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.h b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.h index f12cdda8..e858919b 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.h +++ b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/skipLayerNormPlugin/skipLayerNormKernel.cu b/plugin/skipLayerNormPlugin/skipLayerNormKernel.cu index 5d52c249..da0cee19 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormKernel.cu +++ b/plugin/skipLayerNormPlugin/skipLayerNormKernel.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/skipLayerNormPlugin/skipLayerNormPlugin.cpp b/plugin/skipLayerNormPlugin/skipLayerNormPlugin.cpp index 04ed5885..c792486b 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormPlugin.cpp +++ b/plugin/skipLayerNormPlugin/skipLayerNormPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/skipLayerNormPlugin/skipLayerNormPlugin.h b/plugin/skipLayerNormPlugin/skipLayerNormPlugin.h index b9fb8c50..9b1a783a 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormPlugin.h +++ b/plugin/skipLayerNormPlugin/skipLayerNormPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/specialSlicePlugin/CMakeLists.txt b/plugin/specialSlicePlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/specialSlicePlugin/CMakeLists.txt +++ b/plugin/specialSlicePlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/specialSlicePlugin/specialSlicePlugin.cpp b/plugin/specialSlicePlugin/specialSlicePlugin.cpp index f4bdb04c..3730cfcc 100644 --- a/plugin/specialSlicePlugin/specialSlicePlugin.cpp +++ b/plugin/specialSlicePlugin/specialSlicePlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/specialSlicePlugin/specialSlicePlugin.h b/plugin/specialSlicePlugin/specialSlicePlugin.h index 0837682f..710bb8b4 100644 --- a/plugin/specialSlicePlugin/specialSlicePlugin.h +++ b/plugin/specialSlicePlugin/specialSlicePlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/splitPlugin/CMakeLists.txt b/plugin/splitPlugin/CMakeLists.txt index 1f1d4169..f1f6081b 100644 --- a/plugin/splitPlugin/CMakeLists.txt +++ b/plugin/splitPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/splitPlugin/split.cu b/plugin/splitPlugin/split.cu index 771e9cba..0afec432 100644 --- a/plugin/splitPlugin/split.cu +++ b/plugin/splitPlugin/split.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/splitPlugin/split.h b/plugin/splitPlugin/split.h index cc1916bf..2d7a9bd5 100644 --- a/plugin/splitPlugin/split.h +++ b/plugin/splitPlugin/split.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/voxelGeneratorPlugin/CMakeLists.txt b/plugin/voxelGeneratorPlugin/CMakeLists.txt index a240519a..657bfadc 100644 --- a/plugin/voxelGeneratorPlugin/CMakeLists.txt +++ b/plugin/voxelGeneratorPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/voxelGeneratorPlugin/voxelGenerator.cpp b/plugin/voxelGeneratorPlugin/voxelGenerator.cpp index c27d5193..2fff10cc 100644 --- a/plugin/voxelGeneratorPlugin/voxelGenerator.cpp +++ b/plugin/voxelGeneratorPlugin/voxelGenerator.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/voxelGeneratorPlugin/voxelGenerator.h b/plugin/voxelGeneratorPlugin/voxelGenerator.h index fea96877..9bb4f471 100644 --- a/plugin/voxelGeneratorPlugin/voxelGenerator.h +++ b/plugin/voxelGeneratorPlugin/voxelGenerator.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 1494c1fd..66034f8b 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -91,7 +91,7 @@ if (MSVC) find_path(PY_LIB_DIR ${PYTHON_LIB_NAME}.lib HINTS ${WIN_EXTERNALS}/${PYTHON} ${EXT_PATH}/${PYTHON} PATH_SUFFIXES lib) message(STATUS "PY_LIB_DIR: ${PY_LIB_DIR}") else() - find_path(PY_INCLUDE Python.h HINTS ${EXT_PATH}/${PYTHON} PATH_SUFFIXES include) + find_path(PY_INCLUDE Python.h HINTS ${EXT_PATH}/${PYTHON} /usr/include/${PYTHON} PATH_SUFFIXES include) endif() message(STATUS "PY_INCLUDE: ${PY_INCLUDE}") @@ -133,16 +133,6 @@ else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GLIBCXX_USE_CXX11_ABI_FLAG} -fvisibility=hidden -std=c++${CPP_STANDARD} -Wno-deprecated-declarations") endif() -# remove md -# Add the flags to enable MD-TRT. -if ("${ENABLE_MDTRT}" STREQUAL "1") - include_directories(${TENSORRT_ROOT}/optimizer) - include_directories(${TENSORRT_ROOT}/runtime) - include_directories(${TENSORRT_ROOT}/common) - include_directories(${TENSORRT_ROOT}/safety) - add_compile_definitions(ENABLE_MDTRT=1) -endif() - # Update linker if(${NV_TARGET_OS} MATCHES "wddm2") if(DEFINED W10_LINKER) @@ -159,12 +149,26 @@ else() set(vfc_suffix "") endif() +if (MSVC) + set(nvinfer_lib_name "nvinfer_${TENSORRT_MAJOR_VERSION}") + set(nvinfer_plugin_lib_name "nvinfer_plugin_${TENSORRT_MAJOR_VERSION}") + set(nvonnxparser_lib_name "nvonnxparser_${TENSORRT_MAJOR_VERSION}") + set(nvinfer_lean_lib_name "nvinfer_lean_${TENSORRT_MAJOR_VERSION}${vfc_suffix}") + set(nvinfer_dispatch_lib_name "nvinfer_dispatch_${TENSORRT_MAJOR_VERSION}${vfc_suffix}") +else() + set(nvinfer_lib_name "nvinfer") + set(nvinfer_plugin_lib_name "nvinfer_plugin") + set(nvonnxparser_lib_name "nvonnxparser") + set(nvinfer_lean_lib_name "nvinfer_lean${vfc_suffix}") + set(nvinfer_dispatch_lib_name "nvinfer_dispatch${vfc_suffix}") +endif() + if (${TENSORRT_MODULE} STREQUAL "tensorrt") - set(TRT_LIBS nvinfer nvonnxparser nvinfer_plugin) + set(TRT_LIBS ${nvinfer_lib_name} ${nvonnxparser_lib_name} ${nvinfer_plugin_lib_name}) elseif (${TENSORRT_MODULE} STREQUAL "tensorrt_lean") - set(TRT_LIBS "nvinfer_lean${vfc_suffix}") + set(TRT_LIBS ${nvinfer_lean_lib_name}) elseif (${TENSORRT_MODULE} STREQUAL "tensorrt_dispatch") - set(TRT_LIBS "nvinfer_dispatch${vfc_suffix}") + set(TRT_LIBS ${nvinfer_dispatch_lib_name}) else() message(FATAL_ERROR "Unknown TensorRT module " ${TENSORRT_MODULE}) endif() diff --git a/python/docstrings/infer/pyAlgorithmSelectorDoc.h b/python/docstrings/infer/pyAlgorithmSelectorDoc.h index f5814474..78ba454c 100644 --- a/python/docstrings/infer/pyAlgorithmSelectorDoc.h +++ b/python/docstrings/infer/pyAlgorithmSelectorDoc.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -60,13 +60,7 @@ constexpr const char* descr = R"trtdoc( :ivar num_inputs: :class:`int` number of inputs of the algorithm. :ivar num_outputs: :class:`int` number of outputs of the algorithm. )trtdoc" -// remove md -#if ENABLE_MDTRT - R"trtdoc( - :ivar instance_id: Read-only. The multi-device instance ID. -)trtdoc" -#endif // ENABLE_MDTRT - ; + ; constexpr const char* get_shape = R"trtdoc( Get the minimum / optimum / maximum dimensions for a dynamic input tensor. diff --git a/python/docstrings/infer/pyCoreDoc.h b/python/docstrings/infer/pyCoreDoc.h index 3586fd9f..d59d6ac0 100644 --- a/python/docstrings/infer/pyCoreDoc.h +++ b/python/docstrings/infer/pyCoreDoc.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -407,7 +407,7 @@ constexpr char const* descr = R"trtdoc( :ivar nvtx_verbosity: The NVTX verbosity of the execution context. Building with DETAILED verbosity will generally increase latency in enqueueV3(). Call this method to select NVTX verbosity in this execution context at runtime. The default is the verbosity with which the engine was built, and the verbosity may not be raised above that level. This function does not affect how IEngineInspector interacts with the engine. :ivar temporary_allocator: :class:`IGpuAllocator` The GPU allocator used for internal temporary storage. :ivar weight_streaming_budget: Set and get the current weight streaming budget for inference. The budget may be set to -1 disabling weight streaming at runtime, 0 (default) enabling TRT to choose to weight stream or not, or a positive value in the inclusive range [minimum_weight_streaming_budget, streamable_weights_size - 1]. - :ivar minimum_weight_streaming_budget: Returns the minimum weight streaming budget in bytes required to run the network successfully. The engine must have been built with kWEIGHT_STREAMING. + :ivar minimum_weight_streaming_budget: Returns the minimum weight streaming budget in bytes required to run the network successfully. The engine must have been built with kWEIGHT_STREAMING. :ivar streamable_weights_size: Returns the size of the streamable weights in the engine. This may not include all the weights. )trtdoc"; @@ -731,15 +731,6 @@ constexpr char const* create_execution_context_without_device_memory = R"trtdoc( :returns: An :class:`IExecutionContext` without device memory allocated. )trtdoc"; -constexpr char const* get_profile_shape = R"trtdoc( - Get the minimum/optimum/maximum dimensions for a particular binding under an optimization profile. - - :arg profile_index: The index of the profile. - :arg binding: The binding index or name. - - :returns: A ``List[Dims]`` of length 3, containing the minimum, optimum, and maximum shapes, in that order. -)trtdoc"; - constexpr char const* get_tensor_profile_values = R"trtdoc( Get minimum/optimum/maximum values for an input shape binding under an optimization profile. If the specified binding is not an input shape binding, an exception is raised. @@ -882,7 +873,7 @@ To implement a custom output allocator, ensure that you explicitly instantiate t def reallocate_output_async(self, tensor_name, memory, size, alignment, stream): ... # Your implementation here - + def notify_shape(self, tensor_name, shape): ... # Your implementation here @@ -936,7 +927,7 @@ To implement a custom stream reader, ensure that you explicitly instantiate the def __init__(self): trt.IStreamReader.__init__(self) - def read(self, memory, size): + def read(self, size: int) -> bytes: ... # Your implementation here )trtdoc"; @@ -1032,7 +1023,7 @@ constexpr char const* TACTIC_DRAM = R"trtdoc( cudaGetDeviceProperties.embedded is true, and 100% otherwise. )trtdoc"; constexpr char const* TACTIC_SHARED_MEMORY = R"trtdoc( - TACTIC_SHARED_MEMORY defines the maximum shared memory size utilized for executing + TACTIC_SHARED_MEMORY defines the maximum shared memory size utilized for driver reserved and executing the backend CUDA kernel implementation. Adjust this value to restrict tactics that exceed the specified threshold en masse. The default value is device max capability. This value must be less than 1GiB. @@ -1074,7 +1065,7 @@ constexpr char const* NONE = R"trtdoc( Do not require hardware compatibility with GPU architectures other than that of the GPU on which the engine was built. )trtdoc"; constexpr char const* AMPERE_PLUS = R"trtdoc( - Require that the engine is compatible with Ampere and newer GPUs. This will limit the max shared memory usage to + Require that the engine is compatible with Ampere and newer GPUs. This will limit the combined usage of driver reserved and backend kernel max shared memory to 48KiB, may reduce the number of available tactics for each layer, and may prevent some fusions from occurring. Thus this can decrease the performance, especially for tf32 models. This option will disable cuDNN, cuBLAS, and cuBLAS LT as tactic sources. @@ -1624,7 +1615,7 @@ constexpr char const* deserialize_cuda_engine = R"trtdoc( constexpr char const* deserialize_cuda_engine_reader = R"trtdoc( Deserialize an :class:`ICudaEngine` from a stream reader. - :arg stream_reader: The :class:`PyStreamReader` that will read the serialized :class:`ICudaEngine`. This enables deserialization from a file directly. + :arg stream_reader: The :class:`PyStreamReader` that will read the serialized :class:`ICudaEngine`. This enables deserialization from a file directly. :returns: The :class:`ICudaEngine`, or None if it could not be deserialized. )trtdoc"; @@ -1794,9 +1785,9 @@ constexpr char const* get_weights_prototype = R"trtdoc( The dtype and size of weights prototype is the same as weights used for engine building. The size of the weights prototype is -1 when the name of the weights is None or does not correspond to any refittable weights. - + :arg weights_name: The name of the weights to be refitted. - + :returns: weights prototype associated with the given name. )trtdoc"; @@ -2033,7 +2024,7 @@ Note that all methods below (allocate, reallocate, deallocate, allocate_async, r constexpr char const* allocate = R"trtdoc( [DEPRECATED] Deprecated in TensorRT 10.0. Please use allocate_async instead. A callback implemented by the application to handle acquisition of GPU memory. - This is just a wrapper around a syncronous method allocate_async passing the default stream. + This is just a wrapper around a synchronous method allocate_async passing the default stream. If an allocation request of size 0 is made, ``None`` should be returned. @@ -2052,7 +2043,7 @@ constexpr char const* allocate = R"trtdoc( constexpr char const* deallocate = R"trtdoc( [DEPRECATED] Deprecated in TensorRT 10.0. Please use deallocate_async instead. A callback implemented by the application to handle release of GPU memory. - This is just a wrapper around a syncronous method deallocate_async passing the default stream. + This is just a wrapper around a synchronous method deallocate_async passing the default stream. TensorRT may pass a 0 to this function if it was previously returned by ``allocate()``. diff --git a/python/docstrings/infer/pyFoundationalTypesDoc.h b/python/docstrings/infer/pyFoundationalTypesDoc.h index 0e404631..39ffd53f 100644 --- a/python/docstrings/infer/pyFoundationalTypesDoc.h +++ b/python/docstrings/infer/pyFoundationalTypesDoc.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -94,8 +94,8 @@ constexpr const char* init_type = R"trtdoc( constexpr const char* init_ptr = R"trtdoc( Initializes a Weights object with the specified data. - :type: A type to initialize the weights with. - :ptr: A pointer to the data. + :type: A type to initialize the weights with. + :ptr: A pointer to the data. :count: The number of weights. )trtdoc"; @@ -108,7 +108,7 @@ constexpr const char* numpy = R"trtdoc( Create a numpy array using the underlying buffer of this weights object. The resulting array is just a view over the existing data, i.e. no deep copy is made. - If the weights cannot be converted to NumPy (e.g. due to unsupported data type), the original weights are returned. + If the weights cannot be converted to NumPy (e.g. due to unsupported data type), the original weights are returned. :returns: The NumPy array or the original weights. )trtdoc"; diff --git a/python/docstrings/infer/pyGraphDoc.h b/python/docstrings/infer/pyGraphDoc.h index 1581ad9c..e9913210 100644 --- a/python/docstrings/infer/pyGraphDoc.h +++ b/python/docstrings/infer/pyGraphDoc.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -1341,8 +1341,10 @@ constexpr const char* descr = R"trtdoc( Enumerates bounding box data formats used for the Boxes input tensor in the NMS layer. )trtdoc"; -constexpr const char* CORNER_PAIRS = R"trtdoc((x1, y1, x2, y2) where (x1, y1) and (x2, y2) are any pair of diagonal corners)trtdoc"; -constexpr const char* CENTER_SIZES = R"trtdoc((x_center, y_center, width, height) where (x_center, y_center) is the center point of the box)trtdoc"; +constexpr const char* CORNER_PAIRS + = R"trtdoc((x1, y1, x2, y2) where (x1, y1) and (x2, y2) are any pair of diagonal corners)trtdoc"; +constexpr const char* CENTER_SIZES + = R"trtdoc((x_center, y_center, width, height) where (x_center, y_center) is the center point of the box)trtdoc"; } // namespace BoundingBoxFormatDoc @@ -1422,7 +1424,6 @@ constexpr const char* set_input = R"trtdoc( } // namespace INMSLayerDoc - namespace FillOperationDoc { constexpr const char* descr = R"trtdoc(The tensor fill operations that may performed by an Fill layer.)trtdoc"; diff --git a/python/docstrings/infer/pyInt8Doc.h b/python/docstrings/infer/pyInt8Doc.h index 91b635fe..013c6c75 100644 --- a/python/docstrings/infer/pyInt8Doc.h +++ b/python/docstrings/infer/pyInt8Doc.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/python/docstrings/infer/pyPluginDoc.h b/python/docstrings/infer/pyPluginDoc.h index 5df97568..f541a281 100644 --- a/python/docstrings/infer/pyPluginDoc.h +++ b/python/docstrings/infer/pyPluginDoc.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -183,7 +183,6 @@ constexpr const char* detach_from_context = R"trtdoc( )trtdoc"; } // namespace IPluginV2ExtDoc - namespace IPluginV2DynamicExtDoc { constexpr const char* descr = R"trtdoc( @@ -194,7 +193,7 @@ constexpr const char* descr = R"trtdoc( Similar to `IPluginV2Ext` (including capability to support different output data types), but with support for dynamic shapes. This class is made available for the purpose of implementing `IPluginV2DynamicExt` plugins with Python. Inherited - Python->C++ bindings from `IPluginV2` and `IPluginV2Ext` will continue to work on C++-based `IPluginV2DynamicExt` plugins. + Python->C++ bindings from `IPluginV2` and `IPluginV2Ext` will continue to work on C++-based `IPluginV2DynamicExt` plugins. .. note:: Every attribute except `tensorrt_version` must be explicitly initialized on Python-based plugins. Except `plugin_namespace`, @@ -212,22 +211,22 @@ constexpr const char* initialize = R"trtdoc( Initialize the plugin for execution. This is called when the engine is created. .. note:: - When implementing a Python-based plugin, implementing this method is optional. The default behavior is equivalent to `pass`. + When implementing a Python-based plugin, implementing this method is optional. The default behavior is equivalent to `pass`. .. warning:: In contrast to the C++ API for `initialize()`, this method must not return an error code. The expected behavior is to throw an appropriate exception - if an error occurs. + if an error occurs. .. warning:: This `initialize()` method is not available to be called from Python on C++-based plugins. - + )trtdoc"; constexpr const char* terminate = R"trtdoc( Release resources acquired during plugin layer initialization. This is called when the engine is destroyed. .. note:: - When implementing a Python-based plugin, implementing this method is optional. The default behavior is equivalent to `pass`. + When implementing a Python-based plugin, implementing this method is optional. The default behavior is equivalent to `pass`. )trtdoc"; @@ -238,7 +237,7 @@ constexpr const char* get_output_dimensions = R"trtdoc( This function is called by the implementations of `IBuilder` during analysis of the network. .. warning:: - This `get_output_dimensions()` method is not available to be called from Python on C++-based plugins + This `get_output_dimensions()` method is not available to be called from Python on C++-based plugins :arg output_index: The index of the output tensor :arg inputs: Expressions for dimensions of the input tensors @@ -269,7 +268,7 @@ constexpr const char* configure_plugin = R"trtdoc( Execution phase: `configure_plugin()` is called when a plugin is being prepared for executing the plugin for specific dimensions. This provides an opportunity for the plugin to change algorithmic choices based on the explicit input dimensions stored in `desc.dims` field. .. warning:: - This `configure_plugin()` method is not available to be called from Python on C++-based plugins + This `configure_plugin()` method is not available to be called from Python on C++-based plugins :arg in: The input tensors attributes that are used for configuration. :arg out: The output tensors attributes that are used for configuration. @@ -299,10 +298,10 @@ constexpr const char* get_workspace_size = R"trtdoc( This function is called after the plugin is configured, and possibly during execution. The result should be a sufficient workspace size to deal with inputs and outputs of the given size or any smaller problem. .. note:: - When implementing a Python-based plugin, implementing this method is optional. The default behavior is equivalent to `return 0`. + When implementing a Python-based plugin, implementing this method is optional. The default behavior is equivalent to `return 0`. .. warning:: - This `get_workspace_size()` method is not available to be called from Python on C++-based plugins + This `get_workspace_size()` method is not available to be called from Python on C++-based plugins :arg input_desc: How to interpret the memory for the input tensors. :arg output_desc: How to interpret the memory for the output tensors. @@ -314,7 +313,7 @@ constexpr const char* destroy = R"trtdoc( Destroy the plugin object. This will be called when the :class:`INetworkDefinition` , :class:`Builder` or :class:`ICudaEngine` is destroyed. .. note:: - When implementing a Python-based plugin, implementing this method is optional. The default behavior is a `pass`. + When implementing a Python-based plugin, implementing this method is optional. The default behavior is a `pass`. )trtdoc"; @@ -322,13 +321,13 @@ constexpr const char* enqueue = R"trtdoc( Execute the layer. `inputs` and `outputs` contains pointers to the corresponding input and output device buffers as their `intptr_t` casts. `stream` also represents an `intptr_t` cast of the CUDA stream in which enqueue should be executed. - + .. warning:: Since input, output, and workspace buffers are created and owned by TRT, care must be taken when writing to them from the Python side. .. warning:: In contrast to the C++ API for `enqueue()`, this method must not return an error code. The expected behavior is to throw an appropriate exception. - if an error occurs. + if an error occurs. .. warning:: This `enqueue()` method is not available to be called from Python on C++-based plugins. @@ -345,7 +344,7 @@ constexpr const char* enqueue = R"trtdoc( constexpr const char* clone = R"trtdoc( Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin object with these parameters. - If the source plugin is pre-configured with `configure_plugin()`, the returned object should also be pre-configured. + If the source plugin is pre-configured with `configure_plugin()`, the returned object should also be pre-configured. Cloned plugin objects can share the same per-engine immutable resource (e.g. weights) with the source object to avoid duplication. )trtdoc"; @@ -353,7 +352,7 @@ constexpr const char* get_serialization_size = R"trtdoc( Return the serialization size (in bytes) required by the plugin. .. note:: - When implementing a Python-based plugin, implementing this method is optional. The default behavior is equivalent to `return len(serialize())`. + When implementing a Python-based plugin, implementing this method is optional. The default behavior is equivalent to `return len(serialize())`. )trtdoc"; @@ -392,7 +391,7 @@ constexpr const char* ipluginv3_descr = R"trtdoc( constexpr const char* iplugincapability_descr = R"trtdoc( Base class for plugin capability interfaces - + IPluginCapability represents a split in TensorRT V3 plugins to sub-objects that expose different types of capabilites a plugin may have, as opposed to a single interface which defines all capabilities and behaviors of a plugin. )trtdoc"; @@ -411,7 +410,7 @@ constexpr const char* ipluginv3onecore_descr = R"trtdoc( constexpr const char* ipluginv3onebuild_descr = R"trtdoc( A plugin capability interface that enables the build capability (PluginCapabilityType.BUILD). - + Exposes methods that allow the expression of the build time properties and behavior of a plugin. .. note:: @@ -423,7 +422,7 @@ constexpr const char* ipluginv3onebuild_descr = R"trtdoc( constexpr const char* ipluginv3oneruntime_descr = R"trtdoc( A plugin capability interface that enables the runtime capability (PluginCapabilityType.RUNTIME). - + Exposes methods that allow the expression of the runtime properties and behavior of a plugin. )trtdoc"; @@ -434,7 +433,7 @@ constexpr const char* get_output_shapes = R"trtdoc( This function is called by the implementations of `IBuilder` during analysis of the network. .. warning:: - This `get_output_shapes()` method is not available to be called from Python on C++-based plugins + This get_output_shapes() method is not available to be called from Python on C++-based plugins :arg inputs: Expressions for shapes of the input tensors :arg shape_inputs: Expressions for shapes of the shape inputs @@ -445,9 +444,9 @@ constexpr const char* get_output_shapes = R"trtdoc( constexpr const char* get_output_data_types = R"trtdoc( - Return `DataType`s of the plugin outputs. + Return `DataType` s of the plugin outputs. - Provide `DataType.FLOAT`s if the layer has no inputs. The data type for any size tensor outputs must be + Provide `DataType.FLOAT` s if the layer has no inputs. The data type for any size tensor outputs must be `DataType.INT32`. The returned data types must each have a format that is supported by the plugin. :arg input_types: Data types of the inputs. @@ -458,7 +457,7 @@ constexpr const char* get_output_data_types = R"trtdoc( constexpr const char* configure_plugin = R"trtdoc( Configure the plugin. - This function can be called multiple times in the build phase during creation of an engine by IBuilder. + This function can be called multiple times in the build phase during creation of an engine by IBuilder. Build phase: `configure_plugin()` is called when a plugin is being prepared for profiling but not for any specific input size. This provides an opportunity for the plugin to make algorithmic choices on the basis of input and output formats, along with the bound of possible dimensions. The min, opt and max value of the `DynamicPluginTensorDesc` correspond to the `MIN`, `OPT` and `MAX` value of the current profile that the plugin is @@ -467,31 +466,28 @@ constexpr const char* configure_plugin = R"trtdoc( .. warning:: In contrast to the C++ API for `configurePlugin()`, this method must not return an error code. The expected behavior is to throw an appropriate exception - if an error occurs. + if an error occurs. .. warning:: - This `configure_plugin()` method is not available to be called from Python on C++-based plugins + This `configure_plugin()` method is not available to be called from Python on C++-based plugins :arg in: The input tensors attributes that are used for configuration. :arg out: The output tensors attributes that are used for configuration. )trtdoc"; constexpr const char* on_shape_change = R"trtdoc( - Called when a plugin is being prepared for execution for specific dimensions. This could happen multiple times in the execution phase, both during creation of an engine by IBuilder and execution of an - engine by IExecutionContext. + Called when a plugin is being prepared for execution for specific dimensions. This could happen multiple times in the execution phase, both during creation of an engine by IBuilder and execution of an + engine by IExecutionContext. - * IBuilder will call this function once per profile, with `in` resolved to the values specified by the - kOPT field of the current profile. - * IExecutionContext will call this during the next subsequent instance of enqueue_v2() or execute_v3() if: - - The optimization profile is changed. - - An input binding is changed. + * IBuilder will call this function once per profile, with `in` resolved to the values specified by the kOPT field of the current profile. + * IExecutionContext will call this during the next subsequent instance of enqueue_v2() or execute_v3() if: (1) The optimization profile is changed (2). An input binding is changed. .. warning:: In contrast to the C++ API for `onShapeChange()`, this method must not return an error code. The expected behavior is to throw an appropriate exception - if an error occurs. + if an error occurs. .. warning:: - This `on_shape_change()` method is not available to be called from Python on C++-based plugins + This `on_shape_change()` method is not available to be called from Python on C++-based plugins :arg in: The input tensors attributes that are used for configuration. :arg out: The output tensors attributes that are used for configuration. @@ -521,10 +517,10 @@ constexpr const char* get_workspace_size = R"trtdoc( This function is called after the plugin is configured, and possibly during execution. The result should be a sufficient workspace size to deal with inputs and outputs of the given size or any smaller problem. .. note:: - When implementing a Python-based plugin, implementing this method is optional. The default behavior is equivalent to `return 0`. + When implementing a Python-based plugin, implementing this method is optional. The default behavior is equivalent to `return 0`. .. warning:: - This `get_workspace_size()` method is not available to be called from Python on C++-based plugins + This `get_workspace_size()` method is not available to be called from Python on C++-based plugins :arg input_desc: How to interpret the memory for the input tensors. :arg output_desc: How to interpret the memory for the output tensors. @@ -539,7 +535,7 @@ constexpr const char* destroy = R"trtdoc( There is no direct equivalent to this method in the C++ API. .. note:: - Implementing this method is optional. The default behavior is a `pass`. + Implementing this method is optional. The default behavior is a `pass`. )trtdoc"; @@ -547,13 +543,13 @@ constexpr const char* enqueue = R"trtdoc( Execute the layer. `inputs` and `outputs` contains pointers to the corresponding input and output device buffers as their `intptr_t` casts. `stream` also represents an `intptr_t` cast of the CUDA stream in which enqueue should be executed. - + .. warning:: Since input, output, and workspace buffers are created and owned by TRT, care must be taken when writing to them from the Python side. .. warning:: In contrast to the C++ API for `enqueue()`, this method must not return an error code. The expected behavior is to throw an appropriate exception. - if an error occurs. + if an error occurs. .. warning:: This `enqueue()` method is not available to be called from Python on C++-based plugins. @@ -580,7 +576,7 @@ constexpr const char* get_capability_interface = R"trtdoc( constexpr const char* clone = R"trtdoc( Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin object with these parameters. - If the source plugin is pre-configured with `configure_plugin()`, the returned object should also be pre-configured. + If the source plugin is pre-configured with `configure_plugin()`, the returned object should also be pre-configured. Cloned plugin objects can share the same per-engine immutable resource (e.g. weights) with the source object to avoid duplication. )trtdoc"; @@ -602,7 +598,7 @@ constexpr const char* set_tactic = R"trtdoc( .. warning:: In contrast to the C++ API for `setTactic()`, this method must not return an error code. The expected behavior is to throw an appropriate exception - if an error occurs. + if an error occurs. .. warning:: This `set_tactic()` method is not available to be called from Python on C++-based plugins. @@ -611,7 +607,7 @@ constexpr const char* set_tactic = R"trtdoc( constexpr const char* get_valid_tactics = R"trtdoc( Return any custom tactics that the plugin intends to use. - + .. note:: The provided tactic values must be unique and positive @@ -626,9 +622,9 @@ constexpr const char* attach_to_context = R"trtdoc( This function is called automatically for each plugin when a new execution context is created. The plugin may use resources provided by the resource_context until the plugin is deleted by TensorRT. - + :arg resource_context: A resource context that exposes methods to get access to execution context specific resources. A different resource context is guaranteed for each different execution context to which the plugin is attached. - + .. note:: This method should clone the entire IPluginV3 object, not just the runtime interface @@ -660,7 +656,7 @@ constexpr const char* release = R"trtdoc( constexpr const char* clone = R"trtdoc( Resource initialization (if any) may be skipped for non-cloned objects since only clones will be registered by TensorRT. - + )trtdoc"; } // namespace IPluginResourceDoc @@ -703,7 +699,7 @@ namespace IDimensionExprDoc { constexpr const char* descr = R"trtdoc( An `IDimensionExpr` represents an integer expression constructed from constants, input dimensions, and binary operations. - + These expressions are can be used in overrides of `IPluginV2DynamicExt::get_output_dimensions()` to define output dimensions in terms of input dimensions. )trtdoc"; @@ -787,7 +783,7 @@ namespace IPluginResourceContextDoc { constexpr const char* descr = R"trtdoc( Interface for plugins to access per context resources provided by TensorRT - + There is no public way to construct an IPluginResourceContext. It appears as an argument to trt.IPluginV3OneRuntime.attach_to_context(). )trtdoc"; } // namespace IPluginResourceContextDoc @@ -953,7 +949,7 @@ constexpr const char* get_plugin_creator = R"trtdoc( Return plugin creator based on type, version and namespace .. warning:: - Returns None if a plugin creator with matching name, version, and namespace is found, but is not a + Returns None if a plugin creator with matching name, version, and namespace is found, but is not a descendent of IPluginCreator :arg type: The type of the plugin. @@ -998,12 +994,12 @@ constexpr const char* deregister_library = R"trtdoc( constexpr const char* acquire_plugin_resource = R"trtdoc( Get a handle to a plugin resource registered against the provided key. - :arg: key: Key for identifying the resource. + :arg: key: Key for identifying the resource. :arg: resource: A plugin resource object. The object will only need to be valid until this method returns, as only a clone of this object will be registered by TRT. Cannot be null. )trtdoc"; constexpr const char* release_plugin_resource = R"trtdoc( - Decrement reference count for the resource with this key. If reference count goes to zero after decrement, release() will be invoked on the resource, + Decrement reference count for the resource with this key. If reference count goes to zero after decrement, release() will be invoked on the resource, and the key will be deregistered. :arg: key: Key that was used to register the resource. diff --git a/python/docstrings/parsers/pyOnnxDoc.h b/python/docstrings/parsers/pyOnnxDoc.h index 7099a207..17656d27 100644 --- a/python/docstrings/parsers/pyOnnxDoc.h +++ b/python/docstrings/parsers/pyOnnxDoc.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/python/docstrings/pyTensorRTDoc.h b/python/docstrings/pyTensorRTDoc.h index 2ebb0a82..3594d387 100644 --- a/python/docstrings/pyTensorRTDoc.h +++ b/python/docstrings/pyTensorRTDoc.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/python/include/ForwardDeclarations.h b/python/include/ForwardDeclarations.h index c377bf66..d4bed446 100644 --- a/python/include/ForwardDeclarations.h +++ b/python/include/ForwardDeclarations.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/python/include/utils.h b/python/include/utils.h index 0b46743a..2f0d5bdc 100644 --- a/python/include/utils.h +++ b/python/include/utils.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -162,7 +162,7 @@ void throwPyError(PyObject* type, std::string const& message = "python error"); { \ utils::throwPyError(PyExc_IndexError, "Out of bounds"); \ } \ - }while(false) + } while (false) #define PY_ASSERT_VALUE_ERROR(assertion, msg) \ do \ diff --git a/python/packaging/bindings_wheel/setup.py b/python/packaging/bindings_wheel/setup.py index 7bd97517..32b9a730 100644 --- a/python/packaging/bindings_wheel/setup.py +++ b/python/packaging/bindings_wheel/setup.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/python/packaging/bindings_wheel/tensorrt/__init__.py b/python/packaging/bindings_wheel/tensorrt/__init__.py index 01e49480..e82ee1ec 100644 --- a/python/packaging/bindings_wheel/tensorrt/__init__.py +++ b/python/packaging/bindings_wheel/tensorrt/__init__.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -51,18 +51,18 @@ def find_lib(name): # Order matters here because of dependencies LIBRARIES = { "tensorrt": [ - "nvinfer.dll", + "nvinfer_##TENSORRT_MAJOR##.dll", "cublas64_##CUDA_MAJOR##.dll", "cublasLt64_##CUDA_MAJOR##.dll", "cudnn64_##CUDNN_MAJOR##.dll", - "nvinfer_plugin.dll", - "nvonnxparser.dll", + "nvinfer_plugin_##TENSORRT_MAJOR##.dll", + "nvonnxparser_##TENSORRT_MAJOR##.dll", ], "tensorrt_dispatch": [ - "nvinfer_dispatch.dll", + "nvinfer_dispatch_##TENSORRT_MAJOR##.dll", ], "tensorrt_lean": [ - "nvinfer_lean.dll", + "nvinfer_lean_##TENSORRT_MAJOR##.dll", ], }["##TENSORRT_MODULE##"] diff --git a/python/packaging/frontend_sdist/setup.py b/python/packaging/frontend_sdist/setup.py index b593e52c..8c050d20 100644 --- a/python/packaging/frontend_sdist/setup.py +++ b/python/packaging/frontend_sdist/setup.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -104,14 +104,20 @@ def parent_command_line(): pass # fall back to shell try: - return subprocess.check_output(["ps", "-p", str(pid), "-o", "command", "--no-headers"]).decode() + return subprocess.check_output( + ["ps", "-p", str(pid), "-o", "command", "--no-headers"] + ).decode() except: return "" # use pip-inside-pip hack only if the nvidia index is not set in the environment install_requires = [] -if disable_internal_pip or nvidia_pip_index_url in parent_command_line() or nvidia_pip_index_url in pip_config_list(): +if ( + disable_internal_pip + or nvidia_pip_index_url in parent_command_line() + or nvidia_pip_index_url in pip_config_list() +): install_requires.extend(tensorrt_submodules) cmdclass = {} else: diff --git a/python/packaging/frontend_sdist/tensorrt/__init__.py b/python/packaging/frontend_sdist/tensorrt/__init__.py index d15c89d7..5b7038fd 100644 --- a/python/packaging/frontend_sdist/tensorrt/__init__.py +++ b/python/packaging/frontend_sdist/tensorrt/__init__.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/python/packaging/libs_wheel/setup.py b/python/packaging/libs_wheel/setup.py index b6060e0b..b9f7af76 100644 --- a/python/packaging/libs_wheel/setup.py +++ b/python/packaging/libs_wheel/setup.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/python/packaging/libs_wheel/tensorrt_libs/__init__.py b/python/packaging/libs_wheel/tensorrt_libs/__init__.py index a7d9e91a..0335c921 100644 --- a/python/packaging/libs_wheel/tensorrt_libs/__init__.py +++ b/python/packaging/libs_wheel/tensorrt_libs/__init__.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -43,7 +43,12 @@ def try_load_libs_from_dir(path): ] for dep_path in DEPENDENCY_PATHS: try_load_libs_from_dir( - os.path.join(CURDIR, os.path.pardir, dep_path, "bin" if sys.platform.startswith("win") else "lib") + os.path.join( + CURDIR, + os.path.pardir, + dep_path, + "bin" if sys.platform.startswith("win") else "lib", + ) ) diff --git a/python/packaging/metapackage/setup.py b/python/packaging/metapackage/setup.py index b5f8452f..bd673247 100644 --- a/python/packaging/metapackage/setup.py +++ b/python/packaging/metapackage/setup.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/python/src/infer/pyAlgorithmSelector.cpp b/python/src/infer/pyAlgorithmSelector.cpp index 75fe97d2..81984930 100644 --- a/python/src/infer/pyAlgorithmSelector.cpp +++ b/python/src/infer/pyAlgorithmSelector.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,10 +19,6 @@ #include "ForwardDeclarations.h" #include "utils.h" #include -// remove md -#if ENABLE_MDTRT -#include "api/internal.h" -#endif // ENABLE_MDTRT #include "infer/pyAlgorithmSelectorDoc.h" #include #include @@ -167,11 +163,7 @@ void bindAlgorithm(py::module& m) .def("get_shape", lambdas::get_shape, "index"_a, IAlgorithmContextDoc::get_shape) .def_property_readonly("num_inputs", &IAlgorithmContext::getNbInputs) .def_property_readonly("num_outputs", &IAlgorithmContext::getNbOutputs) -// remove md -#if ENABLE_MDTRT - .def_property_readonly("instance_id", &nvinfer1AlgorithmGetInstanceID) -#endif // ENABLE_MDTRT - ; + ; // IAlgorithm py::class_>( diff --git a/python/src/infer/pyCore.cpp b/python/src/infer/pyCore.cpp index e2d95473..4d6f72e0 100644 --- a/python/src/infer/pyCore.cpp +++ b/python/src/infer/pyCore.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -55,16 +55,16 @@ static const auto opt_profile_get_shape return shapes; }; -static const auto opt_profile_set_shape_input - = [](IOptimizationProfile& self, std::string const& inputName, std::vector const& min, - std::vector const& opt, std::vector const& max) { - PY_ASSERT_RUNTIME_ERROR(self.setShapeValues(inputName.c_str(), OptProfileSelector::kMIN, min.data(), min.size()), - "min input provided for shape tensor is inconsistent with other inputs."); - PY_ASSERT_RUNTIME_ERROR(self.setShapeValues(inputName.c_str(), OptProfileSelector::kOPT, opt.data(), opt.size()), - "opt input provided for shape tensor is inconsistent with other inputs."); - PY_ASSERT_RUNTIME_ERROR(self.setShapeValues(inputName.c_str(), OptProfileSelector::kMAX, max.data(), max.size()), - "max input provided for shape tensor is inconsistent with other inputs."); - }; +static const auto opt_profile_set_shape_input = [](IOptimizationProfile& self, std::string const& inputName, + std::vector const& min, std::vector const& opt, + std::vector const& max) { + PY_ASSERT_RUNTIME_ERROR(self.setShapeValues(inputName.c_str(), OptProfileSelector::kMIN, min.data(), min.size()), + "min input provided for shape tensor is inconsistent with other inputs."); + PY_ASSERT_RUNTIME_ERROR(self.setShapeValues(inputName.c_str(), OptProfileSelector::kOPT, opt.data(), opt.size()), + "opt input provided for shape tensor is inconsistent with other inputs."); + PY_ASSERT_RUNTIME_ERROR(self.setShapeValues(inputName.c_str(), OptProfileSelector::kMAX, max.data(), max.size()), + "max input provided for shape tensor is inconsistent with other inputs."); +}; static const auto opt_profile_get_shape_input = [](IOptimizationProfile& self, std::string const& inputName) -> std::vector> { @@ -144,7 +144,8 @@ Dims castDimsFromPyIterable(PyIterable& in) int32_t const maxDims{static_cast(Dims::MAX_DIMS)}; Dims dims{}; dims.nbDims = py::len(in); - PY_ASSERT_RUNTIME_ERROR(dims.nbDims <= maxDims, "The number of input dims exceeds the maximum allowed number of dimensions"); + PY_ASSERT_RUNTIME_ERROR( + dims.nbDims <= maxDims, "The number of input dims exceeds the maximum allowed number of dimensions"); for (int32_t i = 0; i < dims.nbDims; ++i) { dims.d[i] = in[i].template cast(); @@ -182,21 +183,6 @@ std::vector get_tensor_profile_shape(ICudaEngine& self, std::string const& return shapes; }; -std::vector engine_get_profile_shape(ICudaEngine& self, int32_t profileIndex, int32_t bindingIndex) -{ - std::vector shapes{}; - auto const tensorName = self.getIOTensorName(bindingIndex); - shapes.emplace_back(self.getProfileShape(tensorName, profileIndex, OptProfileSelector::kMIN)); - shapes.emplace_back(self.getProfileShape(tensorName, profileIndex, OptProfileSelector::kOPT)); - shapes.emplace_back(self.getProfileShape(tensorName, profileIndex, OptProfileSelector::kMAX)); - return shapes; -}; -// Overload to allow using binding names instead of indices. -std::vector engine_get_profile_shape_str(ICudaEngine& self, int32_t profileIndex, std::string const& bindingName) -{ - return get_tensor_profile_shape(self, bindingName, profileIndex); -}; - std::vector> get_tensor_profile_values( ICudaEngine& self, int32_t profileIndex, std::string const& tensorName) { @@ -618,8 +604,11 @@ class PyStreamReader : public IStreamReader return 0; } - py::object bytesRead = pyFunc(reinterpret_cast(destination), size); - return bytesRead.cast(); + py::buffer data = pyFunc(size); + py::buffer_info info = data.request(); + int64_t bytesRead = info.size * info.itemsize; + std::memcpy(destination, info.ptr, std::min(bytesRead, size)); + return bytesRead; } catch (std::exception const& e) { @@ -1180,10 +1169,6 @@ void bindCore(py::module& m) .def_property_readonly("name", &ICudaEngine::getName) .def_property_readonly("num_optimization_profiles", &ICudaEngine::getNbOptimizationProfiles) .def_property_readonly("engine_capability", &ICudaEngine::getEngineCapability) - .def("get_profile_shape", utils::deprecate(lambdas::engine_get_profile_shape, "get_tensor_profile_shape"), - "profile_index"_a, "binding"_a, ICudaEngineDoc::get_profile_shape) - .def("get_profile_shape", utils::deprecate(lambdas::engine_get_profile_shape_str, "get_tensor_profile_shape"), - "profile_index"_a, "binding"_a, ICudaEngineDoc::get_profile_shape) // Start of enqueueV3 related APIs. .def_property_readonly("num_io_tensors", &ICudaEngine::getNbIOTensors) .def("get_tensor_name", &ICudaEngine::getIOTensorName, "index"_a, ICudaEngineDoc::get_tensor_name) @@ -1278,7 +1263,7 @@ void bindCore(py::module& m) .def_property_readonly("minimum_weight_streaming_budget", &ICudaEngine::getMinimumWeightStreamingBudget) .def_property_readonly("streamable_weights_size", &ICudaEngine::getStreamableWeightsSize) .def("is_debug_tensor", &ICudaEngine::isDebugTensor, "name"_a, ICudaEngineDoc::is_debug_tensor) - .def("__del__", &utils::doNothingDel); + .def("__del__", &utils::doNothingDel); py::enum_(m, "AllocatorFlag", py::arithmetic{}, AllocatorFlagDoc::descr, py::module_local()) .value("RESIZABLE", AllocatorFlag::kRESIZABLE, AllocatorFlagDoc::RESIZABLE); diff --git a/python/src/infer/pyFoundationalTypes.cpp b/python/src/infer/pyFoundationalTypes.cpp index e89e020a..6f64f7d4 100644 --- a/python/src/infer/pyFoundationalTypes.cpp +++ b/python/src/infer/pyFoundationalTypes.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,8 +40,8 @@ static const auto weights_pointer_constructor = [](DataType const& type, size_t static const auto weights_numpy_constructor = [](py::array& arr) { arr = py::array::ensure(arr); // In order to construct a weights object, we must have a contiguous C-style array. - PY_ASSERT_VALUE_ERROR(arr, - "Could not convert NumPy array to Weights. Is it using a data type supported by TensorRT?"); + PY_ASSERT_VALUE_ERROR( + arr, "Could not convert NumPy array to Weights. Is it using a data type supported by TensorRT?"); PY_ASSERT_VALUE_ERROR((arr.flags() & py::array::c_style), "Could not convert non-contiguous NumPy array to Weights. Please use numpy.ascontiguousarray() to fix this."); return new Weights{utils::type(arr.dtype()), arr.data(), arr.size()}; @@ -105,8 +105,8 @@ static const auto dims_getter = [](Dims const& self, int32_t const pyIndex) -> i static const auto dims_getter_slice = [](Dims const& self, py::slice slice) { size_t start, stop, step, slicelength; - PY_ASSERT_VALUE_ERROR(slice.compute(self.nbDims, &start, &stop, &step, &slicelength), - "Incorrect getter slice dims"); + PY_ASSERT_VALUE_ERROR( + slice.compute(self.nbDims, &start, &stop, &step, &slicelength), "Incorrect getter slice dims"); // Disallow out-of-bounds things. PY_ASSERT_INDEX_ERROR(stop <= self.nbDims); @@ -124,8 +124,8 @@ static const auto dims_setter = [](Dims& self, int32_t const pyIndex, int64_t co static const auto dims_setter_slice = [](Dims& self, py::slice slice, Dims const& other) { size_t start, stop, step, slicelength; - PY_ASSERT_VALUE_ERROR(slice.compute(self.nbDims, &start, &stop, &step, &slicelength), - "Incorrect setter slice dims"); + PY_ASSERT_VALUE_ERROR( + slice.compute(self.nbDims, &start, &stop, &step, &slicelength), "Incorrect setter slice dims"); // Disallow out-of-bounds things. PY_ASSERT_INDEX_ERROR(stop < self.nbDims); diff --git a/python/src/infer/pyGraph.cpp b/python/src/infer/pyGraph.cpp index 730481ae..ddca1e9d 100644 --- a/python/src/infer/pyGraph.cpp +++ b/python/src/infer/pyGraph.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/python/src/infer/pyInt8.cpp b/python/src/infer/pyInt8.cpp index 5639bcd1..9052f796 100644 --- a/python/src/infer/pyInt8.cpp +++ b/python/src/infer/pyInt8.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -82,7 +82,8 @@ class pyCalibratorTrampoline : public Derived { py::gil_scoped_acquire gil{}; - py::function pyReadCalibrationCache = utils::getOverride(static_cast(this), "read_calibration_cache"); + py::function pyReadCalibrationCache + = utils::getOverride(static_cast(this), "read_calibration_cache"); // Cannot cast `None` to py::buffer. auto cacheRaw = pyReadCalibrationCache(); @@ -118,7 +119,7 @@ class pyCalibratorTrampoline : public Derived py::function pyWriteCalibrationCache = utils::getOverride(static_cast(this), "write_calibration_cache"); - #if PYBIND11_VERSION_MAJOR < 2 || PYBIND11_VERSION_MAJOR == 2 && PYBIND11_VERSION_MINOR < 6 +#if PYBIND11_VERSION_MAJOR < 2 || PYBIND11_VERSION_MAJOR == 2 && PYBIND11_VERSION_MINOR < 6 py::buffer_info info{ const_cast(ptr), /* Pointer to buffer */ sizeof(uint8_t), /* Size of one scalar */ @@ -128,10 +129,10 @@ class pyCalibratorTrampoline : public Derived { sizeof(uint8_t) } /* Strides (in bytes) for each index */ }; py::memoryview cache{info}; - #else +#else py::memoryview cache{ py::memoryview::from_buffer(static_cast(ptr), {length}, {sizeof(uint8_t)})}; - #endif +#endif pyWriteCalibrationCache(cache); } catch (std::exception const& e) @@ -284,7 +285,10 @@ void bindInt8(py::module& m) py::class_(m, "IInt8Calibrator", IInt8CalibratorDoc::descr, py::module_local()) .def(py::init<>()) - .def("get_batch_size", utils::deprecateMember(&IInt8Calibrator::getBatchSize, "Implicit batch dimensions support has been removed"), IInt8CalibratorDoc::get_batch_size) + .def("get_batch_size", + utils::deprecateMember( + &IInt8Calibrator::getBatchSize, "Implicit batch dimensions support has been removed"), + IInt8CalibratorDoc::get_batch_size) .def("get_algorithm", &IInt8Calibrator::getAlgorithm, IInt8CalibratorDoc::get_algorithm) // For documentation purposes only .def("get_batch", docGetBatch, "names"_a, IInt8CalibratorDoc::get_batch) @@ -296,7 +300,10 @@ void bindInt8(py::module& m) py::class_( m, "IInt8LegacyCalibrator", IInt8LegacyCalibratorDoc::descr, py::module_local()) .def(py::init<>()) - .def("get_batch_size", utils::deprecateMember(&IInt8LegacyCalibrator::getBatchSize, "Implicit batch dimensions support has been removed"), IInt8CalibratorDoc::get_batch_size) + .def("get_batch_size", + utils::deprecateMember( + &IInt8LegacyCalibrator::getBatchSize, "Implicit batch dimensions support has been removed"), + IInt8CalibratorDoc::get_batch_size) .def("get_algorithm", &IInt8LegacyCalibrator::getAlgorithm, IInt8LegacyCalibratorDoc::get_algorithm) // For documentation purposes only .def("get_batch", docGetBatch, "names"_a, IInt8CalibratorDoc::get_batch) @@ -308,7 +315,10 @@ void bindInt8(py::module& m) py::class_>( m, "IInt8EntropyCalibrator", IInt8EntropyCalibratorDoc::descr, py::module_local()) .def(py::init<>()) - .def("get_batch_size", utils::deprecateMember(&IInt8EntropyCalibrator::getBatchSize, "Implicit batch dimensions support has been removed"), IInt8CalibratorDoc::get_batch_size) + .def("get_batch_size", + utils::deprecateMember( + &IInt8EntropyCalibrator::getBatchSize, "Implicit batch dimensions support has been removed"), + IInt8CalibratorDoc::get_batch_size) .def("get_algorithm", &IInt8EntropyCalibrator::getAlgorithm, IInt8EntropyCalibratorDoc::get_algorithm) // For documentation purposes only .def("get_batch", docGetBatch, "names"_a, IInt8CalibratorDoc::get_batch) @@ -320,7 +330,10 @@ void bindInt8(py::module& m) py::class_>( m, "IInt8EntropyCalibrator2", IInt8EntropyCalibrator2Doc::descr, py::module_local()) .def(py::init<>()) - .def("get_batch_size", utils::deprecateMember(&IInt8EntropyCalibrator2::getBatchSize, "Implicit batch dimensions support has been removed"), IInt8CalibratorDoc::get_batch_size) + .def("get_batch_size", + utils::deprecateMember( + &IInt8EntropyCalibrator2::getBatchSize, "Implicit batch dimensions support has been removed"), + IInt8CalibratorDoc::get_batch_size) .def("get_algorithm", &IInt8EntropyCalibrator2::getAlgorithm, IInt8EntropyCalibrator2Doc::get_algorithm) // For documentation purposes only .def("get_batch", docGetBatch, "names"_a, IInt8CalibratorDoc::get_batch) @@ -332,7 +345,10 @@ void bindInt8(py::module& m) py::class_>( m, "IInt8MinMaxCalibrator", IInt8MinMaxCalibratorDoc::descr, py::module_local()) .def(py::init<>()) - .def("get_batch_size", utils::deprecateMember(&IInt8MinMaxCalibrator::getBatchSize, "Implicit batch dimensions support has been removed"), IInt8CalibratorDoc::get_batch_size) + .def("get_batch_size", + utils::deprecateMember( + &IInt8MinMaxCalibrator::getBatchSize, "Implicit batch dimensions support has been removed"), + IInt8CalibratorDoc::get_batch_size) .def("get_algorithm", &IInt8MinMaxCalibrator::getAlgorithm, IInt8MinMaxCalibratorDoc::get_algorithm) // For documentation purposes only .def("get_batch", docGetBatch, "names"_a, IInt8CalibratorDoc::get_batch) diff --git a/python/src/infer/pyPlugin.cpp b/python/src/infer/pyPlugin.cpp index d87a42ec..9fc1b901 100644 --- a/python/src/infer/pyPlugin.cpp +++ b/python/src/infer/pyPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -87,14 +87,14 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt public: using PyIPluginV2DynamicExt::PyIPluginV2DynamicExt; PyIPluginV2DynamicExtImpl() = default; - PyIPluginV2DynamicExtImpl(const PyIPluginV2DynamicExt& a) {}; + PyIPluginV2DynamicExtImpl(const PyIPluginV2DynamicExt& a){}; int32_t getNbOutputs() const noexcept override { try { py::gil_scoped_acquire gil{}; - if(!mIsNbOutputsInitialized) + if (!mIsNbOutputsInitialized) { utils::throwPyError(PyExc_AttributeError, "num_outputs not initialized"); } @@ -104,7 +104,8 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt return -1; } - bool supportsFormatCombination(int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override + bool supportsFormatCombination( + int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override { try { @@ -118,7 +119,7 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt } std::vector inOutVector; - for(int32_t idx = 0; idx < nbInputs + nbOutputs; ++idx) + for (int32_t idx = 0; idx < nbInputs + nbOutputs; ++idx) { inOutVector.push_back(*(inOut + idx)); } @@ -151,10 +152,11 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt return 0; } - try{ + try + { py::object pyResult = pyInitialize(); } - catch (py::error_already_set &e) + catch (py::error_already_set& e) { std::cerr << "[ERROR] Exception thrown from initialize() " << e.what() << std::endl; return -1; @@ -165,7 +167,8 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt return -1; } - void terminate() noexcept override { + void terminate() noexcept override + { try { py::gil_scoped_acquire gil{}; @@ -173,7 +176,7 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt py::function pyTerminate = py::get_override(static_cast(this), "terminate"); // if no implementation is provided for terminate(), it is defaulted to `pass` - if(pyTerminate) + if (pyTerminate) { pyTerminate(); } @@ -181,7 +184,8 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt PLUGIN_API_CATCH("terminate") } - int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override + int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, void const* const* inputs, + void* const* outputs, void* workspace, cudaStream_t stream) noexcept override { try { @@ -194,12 +198,12 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt } std::vector inVector; - for(int32_t idx = 0; idx < mNbInputs; ++idx) + for (int32_t idx = 0; idx < mNbInputs; ++idx) { inVector.push_back(*(inputDesc + idx)); } std::vector outVector; - for(int32_t idx = 0; idx < mNbOutputs; ++idx) + for (int32_t idx = 0; idx < mNbOutputs; ++idx) { outVector.push_back(*(outputDesc + idx)); } @@ -218,10 +222,11 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt intptr_t workspacePtr = reinterpret_cast(workspace); intptr_t cudaStreamPtr = reinterpret_cast(stream); - try{ + try + { pyEnqueue(inVector, outVector, inPtrs, outPtrs, workspacePtr, cudaStreamPtr); } - catch (py::error_already_set &e) + catch (py::error_already_set& e) { std::cerr << "[ERROR] Exception thrown from enqueue() " << e.what() << std::endl; return -1; @@ -283,8 +288,7 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt { py::gil_scoped_acquire gil{}; - py::function pySerialize - = utils::getOverride(static_cast(this), "serialize"); + py::function pySerialize = utils::getOverride(static_cast(this), "serialize"); if (!pySerialize) { utils::throwPyError(PyExc_RuntimeError, "no implementation provided for serialize()"); @@ -307,7 +311,7 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt try { py::gil_scoped_acquire gil{}; - if(!mIsPluginTypeInitialized) + if (!mIsPluginTypeInitialized) { utils::throwPyError(PyExc_AttributeError, "plugin_type not initialized"); } @@ -322,7 +326,7 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt try { py::gil_scoped_acquire gil{}; - if(!mIsPluginVersionInitialized) + if (!mIsPluginVersionInitialized) { utils::throwPyError(PyExc_AttributeError, "plugin_version not initialized"); } @@ -374,7 +378,6 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt // Remove reference to the Python plugin object so that it could be garbage-collected pyObjVec[this].dec_ref(); - } PLUGIN_API_CATCH("destroy") } @@ -393,7 +396,7 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt { py::gil_scoped_acquire gil{}; // getPluginNamespace() is not passed through to the Python side - if(!mIsNamespaceInitialized) + if (!mIsNamespaceInitialized) { utils::throwPyError(PyExc_AttributeError, "plugin_namespace not initialized"); } @@ -417,7 +420,7 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt } std::vector inVector; - for(int32_t idx = 0; idx < nbInputs; ++idx) + for (int32_t idx = 0; idx < nbInputs; ++idx) { inVector.push_back(*(inputTypes + idx)); } @@ -436,8 +439,8 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt return DataType{}; } - - DimsExprs getOutputDimensions(int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept override + DimsExprs getOutputDimensions( + int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept override { try { @@ -451,7 +454,7 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt } std::vector inVector; - for(int32_t idx = 0; idx < nbInputs; ++idx) + for (int32_t idx = 0; idx < nbInputs; ++idx) { inVector.push_back(*(inputs + idx)); } @@ -470,7 +473,8 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt return DimsExprs{}; } - void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept override + void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, + int32_t nbOutputs) noexcept override { try { @@ -486,13 +490,13 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt } std::vector inVector; - for(int32_t idx = 0; idx < nbInputs; ++idx) + for (int32_t idx = 0; idx < nbInputs; ++idx) { inVector.push_back(*(in + idx)); } std::vector outVector; - for(int32_t idx = 0; idx < nbOutputs; ++idx) + for (int32_t idx = 0; idx < nbOutputs; ++idx) { outVector.push_back(*(out + idx)); } @@ -502,13 +506,15 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt PLUGIN_API_CATCH("configure_plugin") } - size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override + size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, + int32_t nbOutputs) const noexcept override { try { py::gil_scoped_acquire gil{}; - py::function pyGetWorkspaceSize = py::get_override(static_cast(this), "get_workspace_size"); + py::function pyGetWorkspaceSize + = py::get_override(static_cast(this), "get_workspace_size"); if (!pyGetWorkspaceSize) { @@ -517,13 +523,13 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt } std::vector inVector; - for(int32_t idx = 0; idx < nbInputs; ++idx) + for (int32_t idx = 0; idx < nbInputs; ++idx) { inVector.push_back(*(inputs + idx)); } std::vector outVector; - for(int32_t idx = 0; idx < nbOutputs; ++idx) + for (int32_t idx = 0; idx < nbOutputs; ++idx) { outVector.push_back(*(outputs + idx)); } @@ -559,23 +565,24 @@ class PyIPluginV2DynamicExtImpl : public PyIPluginV2DynamicExt mPluginVersion = std::move(pluginVersion); mIsPluginVersionInitialized = true; } - private: - int32_t getTensorRTVersion() const noexcept override - { + +private: + int32_t getTensorRTVersion() const noexcept override + { return static_cast((static_cast(PluginVersion::kV2_DYNAMICEXT_PYTHON) << 24U) | (static_cast(NV_TENSORRT_VERSION) & 0xFFFFFFU)); - } + } - int32_t mNbInputs{}; - int32_t mNbOutputs{}; - std::string mNamespace; - std::string mPluginType; - std::string mPluginVersion; + int32_t mNbInputs{}; + int32_t mNbOutputs{}; + std::string mNamespace; + std::string mPluginType; + std::string mPluginVersion; - bool mIsNbOutputsInitialized{false}; - bool mIsNamespaceInitialized{false}; - bool mIsPluginTypeInitialized{false}; - bool mIsPluginVersionInitialized{false}; + bool mIsNbOutputsInitialized{false}; + bool mIsNamespaceInitialized{false}; + bool mIsPluginTypeInitialized{false}; + bool mIsPluginVersionInitialized{false}; }; class IPluginCreatorImpl : public IPluginCreator @@ -593,7 +600,7 @@ class IPluginCreatorImpl : public IPluginCreator try { py::gil_scoped_acquire gil{}; - if(!mIsNameInitialized) + if (!mIsNameInitialized) { utils::throwPyError(PyExc_AttributeError, "name not initialized"); } @@ -608,7 +615,7 @@ class IPluginCreatorImpl : public IPluginCreator try { py::gil_scoped_acquire gil{}; - if(!mIsPluginVersionInitialized) + if (!mIsPluginVersionInitialized) { utils::throwPyError(PyExc_AttributeError, "plugin_version not initialized"); } @@ -623,7 +630,7 @@ class IPluginCreatorImpl : public IPluginCreator try { py::gil_scoped_acquire gil{}; - if(!mIsFCInitialized) + if (!mIsFCInitialized) { utils::throwPyError(PyExc_AttributeError, "field_names not initialized"); } @@ -661,8 +668,7 @@ class IPluginCreatorImpl : public IPluginCreator return nullptr; } - IPluginV2* deserializePlugin( - const char* name, const void* serialData, size_t serialLength) noexcept override + IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override { try { @@ -677,7 +683,9 @@ class IPluginCreatorImpl : public IPluginCreator std::string nameString{name}; - py::handle handle = pyDeserializePlugin(nameString, py::bytes(static_cast(serialData), serialLength)).release(); + py::handle handle + = pyDeserializePlugin(nameString, py::bytes(static_cast(serialData), serialLength)) + .release(); try { auto result = handle.cast(); @@ -703,7 +711,7 @@ class IPluginCreatorImpl : public IPluginCreator try { py::gil_scoped_acquire gil{}; - if(!mIsNamespaceInitialized) + if (!mIsNamespaceInitialized) { utils::throwPyError(PyExc_AttributeError, "plugin_namespace not initialized"); } @@ -1755,9 +1763,10 @@ bool isPython(IVersionedInterface const& versionedInterface) namespace lambdas { // For IPluginV2 -static const auto IPluginV2_get_output_shape = [](IPluginV2& self, int32_t const index, std::vector const& inputShapes) { - return self.getOutputDimensions(index, inputShapes.data(), inputShapes.size()); -}; +static const auto IPluginV2_get_output_shape + = [](IPluginV2& self, int32_t const index, std::vector const& inputShapes) { + return self.getOutputDimensions(index, inputShapes.data(), inputShapes.size()); + }; static const auto IPluginV2_configure_with_format = [](IPluginV2& self, std::vector const& inputShapes, std::vector const& outputShapes, DataType dtype, @@ -1789,13 +1798,14 @@ static const auto IPluginV2_serialize = [](IPluginV2& self) { }; // `const vector::data()` corresponds to `const void* const*` (pointer to const-pointer to const void) -static const auto IPluginV2_execute_async = [](IPluginV2& self, int32_t batchSize, const std::vector& inputs, - std::vector& outputs, void* workspace, long stream) { +static const auto IPluginV2_execute_async = [](IPluginV2& self, int32_t batchSize, + const std::vector& inputs, std::vector& outputs, + void* workspace, long stream) { return self.enqueue(batchSize, inputs.data(), outputs.data(), workspace, reinterpret_cast(stream)); }; static const auto IPluginV2_set_num_outputs = [](IPluginV2& self, int32_t numOutputs) { - if(getPluginVersion(self.getTensorRTVersion()) == PluginVersion::kV2_DYNAMICEXT_PYTHON) + if (getPluginVersion(self.getTensorRTVersion()) == PluginVersion::kV2_DYNAMICEXT_PYTHON) { auto plugin = static_cast(&self); plugin->setNbOutputs(numOutputs); @@ -1805,7 +1815,7 @@ static const auto IPluginV2_set_num_outputs = [](IPluginV2& self, int32_t numOut }; static const auto IPluginV2_set_plugin_type = [](IPluginV2& self, std::string pluginType) { - if(getPluginVersion(self.getTensorRTVersion()) == PluginVersion::kV2_DYNAMICEXT_PYTHON) + if (getPluginVersion(self.getTensorRTVersion()) == PluginVersion::kV2_DYNAMICEXT_PYTHON) { auto plugin = reinterpret_cast(&self); plugin->setPluginType(std::move(pluginType)); @@ -1815,7 +1825,7 @@ static const auto IPluginV2_set_plugin_type = [](IPluginV2& self, std::string pl }; static const auto IPluginV2_set_plugin_version = [](IPluginV2& self, std::string pluginVersion) { - if(getPluginVersion(self.getTensorRTVersion()) == PluginVersion::kV2_DYNAMICEXT_PYTHON) + if (getPluginVersion(self.getTensorRTVersion()) == PluginVersion::kV2_DYNAMICEXT_PYTHON) { auto plugin = reinterpret_cast(&self); plugin->setPluginVersion(std::move(pluginVersion)); @@ -1841,8 +1851,8 @@ static std::unique_ptr makeBoolArray(std::vector const& v) static const auto configure_plugin = [](IPluginV2Ext& self, std::vector const& inputShapes, std::vector const& outputShapes, std::vector const& inputTypes, std::vector const& outputTypes, - std::vector const& inputIsBroadcasted, std::vector const& outputIsBroadcasted, TensorFormat format, - int32_t maxBatchSize) { + std::vector const& inputIsBroadcasted, std::vector const& outputIsBroadcasted, + TensorFormat format, int32_t maxBatchSize) { auto inputBroadcast = makeBoolArray(inputIsBroadcasted); auto outputBroadcast = makeBoolArray(outputIsBroadcasted); return self.configurePlugin(inputShapes.data(), inputShapes.size(), outputShapes.data(), outputShapes.size(), @@ -1984,7 +1994,7 @@ static const auto dimsexprs_vector_constructor = [](std::vector(Dims::MAX_DIMS)}; PY_ASSERT_VALUE_ERROR(in.size() <= maxDims, - "Input length " + std::to_string(in.size()) + ". Max expected length is " + std::to_string(maxDims)); + "Input length " + std::to_string(in.size()) + ". Max expected length is " + std::to_string(maxDims)); // Create the Dims object. DimsExprs* self = new DimsExprs{}; @@ -2300,6 +2310,80 @@ void bindPlugin(py::module& m) .def_readwrite("opt", &DynamicPluginTensorDesc::opt) .def_readwrite("max", &DynamicPluginTensorDesc::max); + py::enum_(m, "PluginFieldType", PluginFieldTypeDoc::descr, py::module_local()) + .value("FLOAT16", PluginFieldType::kFLOAT16) + .value("FLOAT32", PluginFieldType::kFLOAT32) + .value("FLOAT64", PluginFieldType::kFLOAT64) + .value("INT8", PluginFieldType::kINT8) + .value("INT16", PluginFieldType::kINT16) + .value("INT32", PluginFieldType::kINT32) + .value("CHAR", PluginFieldType::kCHAR) + .value("DIMS", PluginFieldType::kDIMS) + .value("UNKNOWN", PluginFieldType::kUNKNOWN) + .value("BF16", PluginFieldType::kBF16) + .value("INT64", PluginFieldType::kINT64) + .value("FP8", PluginFieldType::kFP8); + + py::class_(m, "PluginField", PluginFieldDoc::descr, py::module_local()) + .def(py::init(lambdas::plugin_field_default_constructor), "name"_a = "", py::keep_alive<1, 2>{}) + .def(py::init(lambdas::plugin_field_constructor), "name"_a, "data"_a, + "type"_a = nvinfer1::PluginFieldType::kUNKNOWN, py::keep_alive<1, 2>{}, py::keep_alive<1, 3>{}) + .def_property( + "name", [](PluginField& self) { return self.name; }, + py::cpp_function( + [](PluginField& self, FallbackString& name) { self.name = name.c_str(); }, py::keep_alive<1, 2>{})) + .def_property( + "data", + [](PluginField& self) { + switch (self.type) + { + case PluginFieldType::kINT32: + return py::array(self.length, static_cast(self.data)); + break; + case PluginFieldType::kINT8: + return py::array(self.length, static_cast(self.data)); + break; + case PluginFieldType::kINT16: + return py::array(self.length, static_cast(self.data)); + break; + case PluginFieldType::kFLOAT16: + // TODO: Figure out how to handle float16 correctly here + return py::array(self.length, static_cast(self.data)); + break; + case PluginFieldType::kFLOAT32: + return py::array(self.length, static_cast(self.data)); + break; + case PluginFieldType::kFLOAT64: + return py::array(self.length, static_cast(self.data)); + break; + case PluginFieldType::kCHAR: return py::array(self.length, static_cast(self.data)); break; + default: assert(false && "No known conversion for returning data from PluginField"); break; + } + // should not reach this line + return py::array(); + }, + py::cpp_function( + [](PluginField& self, py::buffer& buffer) { + py::buffer_info info = buffer.request(); + self.data = info.ptr; + }, + py::keep_alive<1, 2>{})) + .def_readwrite("type", &PluginField::type) + .def_readwrite("size", &PluginField::length); + + // PluginFieldCollection behaves like an iterable, and can be constructed from iterables. + py::class_(m, "PluginFieldCollection_", PluginFieldCollectionDoc::descr, py::module_local()) + .def(py::init<>(lambdas::plugin_field_collection_constructor), py::keep_alive<1, 2>{}) + .def("__len__", [](PluginFieldCollection& self) { return self.nbFields; }) + .def("__getitem__", [](PluginFieldCollection& self, int32_t const index) { + PY_ASSERT_INDEX_ERROR(index < self.nbFields); + return self.fields[index]; + }); + + // Creating a trt.PluginFieldCollection in Python will actually construct a vector, + // which can then be converted to an actual C++ PluginFieldCollection. + py::implicitly_convertible, PluginFieldCollection>(); + py::class_(m, "IPluginV2", IPluginV2Doc::descr, py::module_local()) .def_property("num_outputs", &IPluginV2::getNbOutputs, lambdas::IPluginV2_set_num_outputs) .def_property_readonly("tensorrt_version", &IPluginV2::getTensorRTVersion) @@ -2337,7 +2421,8 @@ void bindPlugin(py::module& m) .def("clone", &IPluginV2Ext::clone, IPluginV2ExtDoc::clone); ; - py::class_>(m, "IPluginV2DynamicExtBase", py::module_local()); + py::class_>( + m, "IPluginV2DynamicExtBase", py::module_local()); py::class_>( @@ -2366,6 +2451,9 @@ void bindPlugin(py::module& m) "stream"_a, IPluginV2DynamicExtDoc::enqueue) .def("clone", &pluginDoc::clone, IPluginV2DynamicExtDoc::clone); + py::class_>( + m, "IPluginCapability", IPluginV3Doc::iplugincapability_descr, py::module_local()); + py::class_>( m, "IPluginV3", IPluginV3Doc::ipluginv3_descr, py::module_local()) .def(py::init<>()) @@ -2375,9 +2463,6 @@ void bindPlugin(py::module& m) .def("clone", &pluginDoc::cloneV3, IPluginV3Doc::clone) .def("destroy", &pluginDoc::destroyV3, IPluginV3Doc::destroy); - py::class_>( - m, "IPluginCapability", IPluginV3Doc::iplugincapability_descr, py::module_local()); - py::class_>( m, "IPluginV3OneCore", IPluginV3Doc::ipluginv3onecore_descr, py::module_local()) @@ -2430,80 +2515,6 @@ void bindPlugin(py::module& m) "stream"_a, IPluginV3Doc::enqueue) .def("attach_to_context", &pluginDoc::attachToContext, "resource_context"_a, IPluginV3Doc::attach_to_context); - py::enum_(m, "PluginFieldType", PluginFieldTypeDoc::descr, py::module_local()) - .value("FLOAT16", PluginFieldType::kFLOAT16) - .value("FLOAT32", PluginFieldType::kFLOAT32) - .value("FLOAT64", PluginFieldType::kFLOAT64) - .value("INT8", PluginFieldType::kINT8) - .value("INT16", PluginFieldType::kINT16) - .value("INT32", PluginFieldType::kINT32) - .value("CHAR", PluginFieldType::kCHAR) - .value("DIMS", PluginFieldType::kDIMS) - .value("UNKNOWN", PluginFieldType::kUNKNOWN) - .value("BF16", PluginFieldType::kBF16) - .value("INT64", PluginFieldType::kINT64) - .value("FP8", PluginFieldType::kFP8); - - py::class_(m, "PluginField", PluginFieldDoc::descr, py::module_local()) - .def(py::init(lambdas::plugin_field_default_constructor), "name"_a = "", py::keep_alive<1, 2>{}) - .def(py::init(lambdas::plugin_field_constructor), "name"_a, "data"_a, - "type"_a = nvinfer1::PluginFieldType::kUNKNOWN, py::keep_alive<1, 2>{}, py::keep_alive<1, 3>{}) - .def_property( - "name", [](PluginField& self) { return self.name; }, - py::cpp_function( - [](PluginField& self, FallbackString& name) { self.name = name.c_str(); }, py::keep_alive<1, 2>{})) - .def_property( - "data", - [](PluginField& self) { - switch (self.type) - { - case PluginFieldType::kINT32: - return py::array(self.length, static_cast(self.data)); - break; - case PluginFieldType::kINT8: - return py::array(self.length, static_cast(self.data)); - break; - case PluginFieldType::kINT16: - return py::array(self.length, static_cast(self.data)); - break; - case PluginFieldType::kFLOAT16: - // TODO: Figure out how to handle float16 correctly here - return py::array(self.length, static_cast(self.data)); - break; - case PluginFieldType::kFLOAT32: - return py::array(self.length, static_cast(self.data)); - break; - case PluginFieldType::kFLOAT64: - return py::array(self.length, static_cast(self.data)); - break; - case PluginFieldType::kCHAR: return py::array(self.length, static_cast(self.data)); break; - default: assert(false && "No known conversion for returning data from PluginField"); break; - } - // should not reach this line - return py::array(); - }, - py::cpp_function( - [](PluginField& self, py::buffer& buffer) { - py::buffer_info info = buffer.request(); - self.data = info.ptr; - }, - py::keep_alive<1, 2>{})) - .def_readwrite("type", &PluginField::type) - .def_readwrite("size", &PluginField::length); - - // PluginFieldCollection behaves like an iterable, and can be constructed from iterables. - py::class_(m, "PluginFieldCollection_", PluginFieldCollectionDoc::descr, py::module_local()) - .def(py::init<>(lambdas::plugin_field_collection_constructor), py::keep_alive<1, 2>{}) - .def("__len__", [](PluginFieldCollection& self) { return self.nbFields; }) - .def("__getitem__", [](PluginFieldCollection& self, int32_t const index) { - PY_ASSERT_INDEX_ERROR(index < self.nbFields); - return self.fields[index]; - }); - - // Creating a trt.PluginFieldCollection in Python will actually construct a vector, - // which can then be converted to an actual C++ PluginFieldCollection. - py::implicitly_convertible, PluginFieldCollection>(); - py::class_( m, "IPluginCreatorInterface", IPluginCreatorInterfaceDoc::descr, py::module_local()); diff --git a/python/src/parsers/pyOnnx.cpp b/python/src/parsers/pyOnnx.cpp index 122fc219..9059a3b7 100644 --- a/python/src/parsers/pyOnnx.cpp +++ b/python/src/parsers/pyOnnx.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/python/src/pyTensorRT.cpp b/python/src/pyTensorRT.cpp index a7fe0017..c562703a 100644 --- a/python/src/pyTensorRT.cpp +++ b/python/src/pyTensorRT.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/python/src/utils.cpp b/python/src/utils.cpp index 46e8b3ba..de601542 100644 --- a/python/src/utils.cpp +++ b/python/src/utils.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -44,7 +44,7 @@ size_t size(nvinfer1::DataType type) case nvinfer1::DataType::kUINT8: return 1; case nvinfer1::DataType::kFP8: return 1; case nvinfer1::DataType::kBF16: return 2; - case nvinfer1::DataType::kINT4: break; //TRT-22011 - need to address sub-byte element size + case nvinfer1::DataType::kINT4: break; // TRT-22011 - need to address sub-byte element size } return -1; } diff --git a/quickstart/IntroNotebooks/Additional Examples/helper.py b/quickstart/IntroNotebooks/Additional Examples/helper.py index 66c4e006..c00ed985 100644 --- a/quickstart/IntroNotebooks/Additional Examples/helper.py +++ b/quickstart/IntroNotebooks/Additional Examples/helper.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/IntroNotebooks/helper.py b/quickstart/IntroNotebooks/helper.py index 66c4e006..c00ed985 100644 --- a/quickstart/IntroNotebooks/helper.py +++ b/quickstart/IntroNotebooks/helper.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/IntroNotebooks/onnx_helper.py b/quickstart/IntroNotebooks/onnx_helper.py index 6bea97dd..2f3d6767 100644 --- a/quickstart/IntroNotebooks/onnx_helper.py +++ b/quickstart/IntroNotebooks/onnx_helper.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/Makefile b/quickstart/Makefile index bf728ff4..1e700e3d 100644 --- a/quickstart/Makefile +++ b/quickstart/Makefile @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/Makefile.config b/quickstart/Makefile.config index d81f325d..0d290ea5 100644 --- a/quickstart/Makefile.config +++ b/quickstart/Makefile.config @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/SemanticSegmentation/Makefile b/quickstart/SemanticSegmentation/Makefile index 5c1bdea3..3c1f68d0 100644 --- a/quickstart/SemanticSegmentation/Makefile +++ b/quickstart/SemanticSegmentation/Makefile @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/SemanticSegmentation/export.py b/quickstart/SemanticSegmentation/export.py index e5168aaa..560e233e 100644 --- a/quickstart/SemanticSegmentation/export.py +++ b/quickstart/SemanticSegmentation/export.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/SemanticSegmentation/tutorial-runtime.cpp b/quickstart/SemanticSegmentation/tutorial-runtime.cpp index 7f0854a3..c1f09197 100644 --- a/quickstart/SemanticSegmentation/tutorial-runtime.cpp +++ b/quickstart/SemanticSegmentation/tutorial-runtime.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/common/logger.cpp b/quickstart/common/logger.cpp index 2eaccd54..9d07754c 100644 --- a/quickstart/common/logger.cpp +++ b/quickstart/common/logger.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/common/logger.h b/quickstart/common/logger.h index 513275c2..35cbf367 100644 --- a/quickstart/common/logger.h +++ b/quickstart/common/logger.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/common/logging.h b/quickstart/common/logging.h index f323d22b..d891e168 100644 --- a/quickstart/common/logging.h +++ b/quickstart/common/logging.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/common/util.cpp b/quickstart/common/util.cpp index 717b63aa..55ccd630 100644 --- a/quickstart/common/util.cpp +++ b/quickstart/common/util.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/common/util.h b/quickstart/common/util.h index 50455e97..55457969 100644 --- a/quickstart/common/util.h +++ b/quickstart/common/util.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/deploy_to_triton/config.pbtxt b/quickstart/deploy_to_triton/config.pbtxt index 63046c8d..f65a9c55 100644 --- a/quickstart/deploy_to_triton/config.pbtxt +++ b/quickstart/deploy_to_triton/config.pbtxt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/deploy_to_triton/export_resnet_to_onnx.py b/quickstart/deploy_to_triton/export_resnet_to_onnx.py index fba1550a..64d6b137 100644 --- a/quickstart/deploy_to_triton/export_resnet_to_onnx.py +++ b/quickstart/deploy_to_triton/export_resnet_to_onnx.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/quickstart/deploy_to_triton/triton_client.py b/quickstart/deploy_to_triton/triton_client.py index a6e7553d..1575e208 100644 --- a/quickstart/deploy_to_triton/triton_client.py +++ b/quickstart/deploy_to_triton/triton_client.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 1c26cc38..513810d9 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/CMakeSamplesTemplate.txt b/samples/CMakeSamplesTemplate.txt index d4f78ae5..285e3f99 100644 --- a/samples/CMakeSamplesTemplate.txt +++ b/samples/CMakeSamplesTemplate.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -62,11 +62,11 @@ add_executable(${TARGET_NAME} set(DEPS_LIST "") if(BUILD_PLUGINS) - list(APPEND DEPS_LIST nvinfer_plugin) + list(APPEND DEPS_LIST ${nvinfer_plugin_lib_name}) endif() if(BUILD_PARSERS) - list(APPEND DEPS_LIST nvonnxparser) + list(APPEND DEPS_LIST ${nvonnxparser_lib_name}) endif() if(BUILD_PLUGINS OR BUILD_PARSERS) @@ -93,7 +93,7 @@ target_compile_options(${TARGET_NAME} PUBLIC set(SAMPLE_DEP_LIBS ${CUDART_LIB} - ${nvinfer_LIB_PATH} + ${${nvinfer_lib_name}_LIB_PATH} ${RT_LIB} ${CMAKE_DL_LIBS} ${CMAKE_THREAD_LIBS_INIT} @@ -104,17 +104,17 @@ if (NOT MSVC) endif() if(${PLUGINS_NEEDED}) - list(APPEND SAMPLE_DEP_LIBS nvinfer_plugin) + list(APPEND SAMPLE_DEP_LIBS ${nvinfer_plugin_lib_name}) endif() if("onnx" IN_LIST SAMPLE_PARSERS) - list(APPEND SAMPLE_DEP_LIBS nvonnxparser) + list(APPEND SAMPLE_DEP_LIBS ${nvonnxparser_lib_name}) endif() -# Necessary to link nvinfer_plugin library. +# Necessary to link nvinfer_plugin library. Add unresolved symbols flag for non-Windows platforms. target_link_libraries(${TARGET_NAME} ${SAMPLE_DEP_LIBS} - -Wl,--unresolved-symbols=ignore-in-shared-libs + $<$>:-Wl,--unresolved-symbols=ignore-in-shared-libs> ) set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS "-Wl,--exclude-libs,ALL") diff --git a/samples/common/BatchStream.h b/samples/common/BatchStream.h index f6da8d70..c4ab9de0 100644 --- a/samples/common/BatchStream.h +++ b/samples/common/BatchStream.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/EntropyCalibrator.h b/samples/common/EntropyCalibrator.h index 936d10e0..67a0130e 100644 --- a/samples/common/EntropyCalibrator.h +++ b/samples/common/EntropyCalibrator.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/ErrorRecorder.h b/samples/common/ErrorRecorder.h index cd00f745..bfb857c5 100644 --- a/samples/common/ErrorRecorder.h +++ b/samples/common/ErrorRecorder.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,7 +17,7 @@ #ifndef ERROR_RECORDER_H #define ERROR_RECORDER_H -#include "NvInferRuntimeBase.h" +#include "NvInferRuntime.h" #include "logger.h" #include #include diff --git a/samples/common/argsParser.h b/samples/common/argsParser.h index 745070d9..b302dc47 100644 --- a/samples/common/argsParser.h +++ b/samples/common/argsParser.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -68,7 +68,7 @@ struct Args std::vector dataDirs; std::string saveEngine; std::string loadEngine; - bool rowMajor{true}; + bool rowOrder{true}; }; //! @@ -85,7 +85,7 @@ inline bool parseArgs(Args& args, int32_t argc, char* argv[]) int32_t arg; static struct option long_options[] = {{"help", no_argument, 0, 'h'}, {"datadir", required_argument, 0, 'd'}, {"int8", no_argument, 0, 'i'}, {"fp16", no_argument, 0, 'f'}, {"bf16", no_argument, 0, 'z'}, - {"columnMajor", no_argument, 0, 'c'}, {"saveEngine", required_argument, 0, 's'}, + {"columnOrder", no_argument, 0, 'c'}, {"saveEngine", required_argument, 0, 's'}, {"loadEngine", required_argument, 0, 'o'}, {"useDLACore", required_argument, 0, 'u'}, {"batch", required_argument, 0, 'b'}, {nullptr, 0, nullptr, 0}}; int32_t option_index = 0; @@ -124,7 +124,7 @@ inline bool parseArgs(Args& args, int32_t argc, char* argv[]) case 'i': args.runInInt8 = true; break; case 'f': args.runInFp16 = true; break; case 'z': args.runInBf16 = true; break; - case 'c': args.rowMajor = false; break; + case 'c': args.rowOrder = false; break; case 'u': if (optarg) { diff --git a/samples/common/bfloat16.cpp b/samples/common/bfloat16.cpp index a9944789..8222826a 100644 --- a/samples/common/bfloat16.cpp +++ b/samples/common/bfloat16.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/bfloat16.h b/samples/common/bfloat16.h index 90b77421..0d0ab922 100644 --- a/samples/common/bfloat16.h +++ b/samples/common/bfloat16.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/buffers.h b/samples/common/buffers.h index bf40dc9c..e58f2f5c 100644 --- a/samples/common/buffers.h +++ b/samples/common/buffers.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/common.h b/samples/common/common.h index 557bd169..0324d2fb 100644 --- a/samples/common/common.h +++ b/samples/common/common.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/dumpTFWts.py b/samples/common/dumpTFWts.py index 0b7a0123..70770fbd 100644 --- a/samples/common/dumpTFWts.py +++ b/samples/common/dumpTFWts.py @@ -1,6 +1,6 @@ #!/usr/bin/python # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/getOptions.cpp b/samples/common/getOptions.cpp index 8bcf7958..19cd3281 100644 --- a/samples/common/getOptions.cpp +++ b/samples/common/getOptions.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/getOptions.h b/samples/common/getOptions.h index e8460513..4bbf9e27 100644 --- a/samples/common/getOptions.h +++ b/samples/common/getOptions.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/getoptWin.h b/samples/common/getoptWin.h index 7e1cf1ba..a1dc6ffa 100644 --- a/samples/common/getoptWin.h +++ b/samples/common/getoptWin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/half.h b/samples/common/half.h index c5ebdb1a..b997e7db 100644 --- a/samples/common/half.h +++ b/samples/common/half.h @@ -16,7 +16,7 @@ // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/logger.cpp b/samples/common/logger.cpp index 0592db2c..909ec0bb 100644 --- a/samples/common/logger.cpp +++ b/samples/common/logger.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/logger.h b/samples/common/logger.h index ff59bfa9..8205e457 100644 --- a/samples/common/logger.h +++ b/samples/common/logger.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/logging.h b/samples/common/logging.h index e61b3687..d2c571d9 100644 --- a/samples/common/logging.h +++ b/samples/common/logging.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,7 +18,7 @@ #ifndef TENSORRT_LOGGING_H #define TENSORRT_LOGGING_H -#include "NvInferRuntimeBase.h" +#include "NvInferRuntime.h" #include "sampleOptions.h" #include #include diff --git a/samples/common/parserOnnxConfig.h b/samples/common/parserOnnxConfig.h index ed0a9b55..67ee6c71 100644 --- a/samples/common/parserOnnxConfig.h +++ b/samples/common/parserOnnxConfig.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/safeCommon.h b/samples/common/safeCommon.h index fc9f28b0..4cc87a70 100644 --- a/samples/common/safeCommon.h +++ b/samples/common/safeCommon.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,7 +18,7 @@ #ifndef TENSORRT_SAFE_COMMON_H #define TENSORRT_SAFE_COMMON_H -#include "NvInferRuntimeBase.h" +#include "NvInferSafeRuntime.h" #include "cuda_runtime.h" #include "sampleEntrypoints.h" #include diff --git a/samples/common/sampleConfig.h b/samples/common/sampleConfig.h index f60ed363..801a268a 100644 --- a/samples/common/sampleConfig.h +++ b/samples/common/sampleConfig.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/sampleDevice.cpp b/samples/common/sampleDevice.cpp index f504fa69..235ad9f0 100644 --- a/samples/common/sampleDevice.cpp +++ b/samples/common/sampleDevice.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/sampleDevice.h b/samples/common/sampleDevice.h index ad122180..5e62f6d0 100644 --- a/samples/common/sampleDevice.h +++ b/samples/common/sampleDevice.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/sampleEngines.cpp b/samples/common/sampleEngines.cpp index bea07a53..b39d513b 100644 --- a/samples/common/sampleEngines.cpp +++ b/samples/common/sampleEngines.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -654,7 +654,15 @@ void setMemoryPoolLimits(IBuilderConfig& config, BuildOptions const& build) } if (build.tacticSharedMem >= 0) { - config.setMemoryPoolLimit(MemoryPoolType::kTACTIC_SHARED_MEMORY, roundToBytes(build.tacticSharedMem)); + if (build.tacticSharedMem >= 0.046 && build.tacticSharedMem <= 0.047) + { + // 48KB is a common use case but user might not type the exact number 0.046875MB. + config.setMemoryPoolLimit(MemoryPoolType::kTACTIC_SHARED_MEMORY, 48 << 10); + } + else + { + config.setMemoryPoolLimit(MemoryPoolType::kTACTIC_SHARED_MEMORY, roundToBytes(build.tacticSharedMem)); + } } } diff --git a/samples/common/sampleEngines.h b/samples/common/sampleEngines.h index f6cff080..4c4272b7 100644 --- a/samples/common/sampleEngines.h +++ b/samples/common/sampleEngines.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/sampleEntrypoints.h b/samples/common/sampleEntrypoints.h index 70f45dde..cc8bf1b9 100644 --- a/samples/common/sampleEntrypoints.h +++ b/samples/common/sampleEntrypoints.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/sampleInference.cpp b/samples/common/sampleInference.cpp index dfc76708..024dd6f6 100644 --- a/samples/common/sampleInference.cpp +++ b/samples/common/sampleInference.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -620,8 +620,10 @@ class EnqueueExplicit : private Enqueue try { bool const result = mContext.enqueueV3(stream.get()); - // Collecting layer timing info from current profile index of execution context - if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() && !mContext.reportToProfiler()) + // Collecting layer timing info from current profile index of execution context, except under capturing + // mode. + if (!isStreamCapturing(stream) && mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() + && !mContext.reportToProfiler()) { gLogWarning << "Failed to collect layer timing info from previous enqueueV3()" << std::endl; } @@ -635,6 +637,14 @@ class EnqueueExplicit : private Enqueue } private: + // Helper function to check if a stream is in capturing mode. + bool isStreamCapturing(TrtCudaStream& stream) const + { + cudaStreamCaptureStatus status{cudaStreamCaptureStatusNone}; + cudaCheck(cudaStreamIsCapturing(stream.get(), &status)); + return status != cudaStreamCaptureStatusNone; + } + Bindings const& mBindings; }; @@ -931,6 +941,8 @@ class Iteration mEnqueue = EnqueueFunction(EnqueueExplicit(context, mBindings)); if (inference.graph) { + sample::gLogInfo << "Capturing CUDA graph for the current execution context" << std::endl; + TrtCudaStream& stream = getStream(StreamType::kCOMPUTE); // Avoid capturing initialization calls by executing the enqueue function at least // once before starting CUDA graph capture. @@ -948,6 +960,7 @@ class Iteration { mGraph.endCapture(stream); mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph)); + sample::gLogInfo << "Successfully captured CUDA graph for the current execution context" << std::endl; } else { diff --git a/samples/common/sampleInference.h b/samples/common/sampleInference.h index e726cb31..e8e53bb7 100644 --- a/samples/common/sampleInference.h +++ b/samples/common/sampleInference.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/sampleOptions.cpp b/samples/common/sampleOptions.cpp index 575668e1..7f2bd9f1 100644 --- a/samples/common/sampleOptions.cpp +++ b/samples/common/sampleOptions.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/sampleOptions.h b/samples/common/sampleOptions.h index 00e8b15d..cddbc60d 100644 --- a/samples/common/sampleOptions.h +++ b/samples/common/sampleOptions.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/sampleReporting.cpp b/samples/common/sampleReporting.cpp index 3c8efab0..1d3e2ca5 100644 --- a/samples/common/sampleReporting.cpp +++ b/samples/common/sampleReporting.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/sampleReporting.h b/samples/common/sampleReporting.h index 8cab62ba..c6813fe6 100644 --- a/samples/common/sampleReporting.h +++ b/samples/common/sampleReporting.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/sampleUtils.cpp b/samples/common/sampleUtils.cpp index 7f827bc8..522cde65 100644 --- a/samples/common/sampleUtils.cpp +++ b/samples/common/sampleUtils.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/sampleUtils.h b/samples/common/sampleUtils.h index 32d5f1b0..6cd4280b 100644 --- a/samples/common/sampleUtils.h +++ b/samples/common/sampleUtils.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/common/streamReader.h b/samples/common/streamReader.h index 657e35b8..7d4aa1c6 100644 --- a/samples/common/streamReader.h +++ b/samples/common/streamReader.h @@ -18,7 +18,7 @@ #ifndef STREAM_READER_H #define STREAM_READER_H -#include "NvInferRuntimeBase.h" +#include "NvInferRuntime.h" #include "sampleUtils.h" #include diff --git a/samples/python/common.py b/samples/python/common.py index f289c366..10b2c323 100644 --- a/samples/python/common.py +++ b/samples/python/common.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -27,16 +27,21 @@ except NameError: FileNotFoundError = IOError + def GiB(val): return val * 1 << 30 def add_help(description): - parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = argparse.ArgumentParser( + description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) args, _ = parser.parse_known_args() -def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[], err_msg=""): +def find_sample_data( + description="Runs a TensorRT Python sample", subfolder="", find_files=[], err_msg="" +): """ Parses sample arguments. @@ -51,7 +56,9 @@ def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", # Standard command-line arguments for all samples. kDEFAULT_DATA_ROOT = os.path.join(os.sep, "usr", "src", "tensorrt", "data") - parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = argparse.ArgumentParser( + description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) parser.add_argument( "-d", "--datadir", @@ -66,7 +73,13 @@ def get_data_path(data_dir): data_path = os.path.join(data_dir, subfolder) if not os.path.exists(data_path): if data_dir != kDEFAULT_DATA_ROOT: - print("WARNING: " + data_path + " does not exist. Trying " + data_dir + " instead.") + print( + "WARNING: " + + data_path + + " does not exist. Trying " + + data_dir + + " instead." + ) data_path = data_dir # Make sure data directory exists. if not (os.path.exists(data_path)) and data_dir != kDEFAULT_DATA_ROOT: @@ -109,10 +122,13 @@ def locate_files(data_paths, filenames, err_msg=""): for f, filename in zip(found_files, filenames): if not f or not os.path.exists(f): raise FileNotFoundError( - "Could not find {:}. Searched in data paths: {:}\n{:}".format(filename, data_paths, err_msg) + "Could not find {:}. Searched in data paths: {:}\n{:}".format( + filename, data_paths, err_msg + ) ) return found_files + # Sets up the builder to use the timing cache file, and creates it if it does not already exist def setup_timing_cache(config: trt.IBuilderConfig, timing_cache_path: os.PathLike): buffer = b"" @@ -122,8 +138,9 @@ def setup_timing_cache(config: trt.IBuilderConfig, timing_cache_path: os.PathLik timing_cache: trt.ITimingCache = config.create_timing_cache(buffer) config.set_timing_cache(timing_cache, True) + # Saves the config's timing cache to file def save_timing_cache(config: trt.IBuilderConfig, timing_cache_path: os.PathLike): timing_cache: trt.ITimingCache = config.get_timing_cache() - with open(timing_cache_path, 'wb') as timing_cache_file: + with open(timing_cache_path, "wb") as timing_cache_file: timing_cache_file.write(memoryview(timing_cache.serialize())) diff --git a/samples/python/detectron2/build_engine.py b/samples/python/detectron2/build_engine.py index aa6f5795..c62b941c 100644 --- a/samples/python/detectron2/build_engine.py +++ b/samples/python/detectron2/build_engine.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -33,6 +33,7 @@ sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) import common + class EngineCalibrator(trt.IInt8MinMaxCalibrator): """ Implements the INT8 MinMax Calibrator. @@ -55,7 +56,10 @@ def set_image_batcher(self, image_batcher: ImageBatcher): :param image_batcher: The ImageBatcher object """ self.image_batcher = image_batcher - self.size = int(np.dtype(self.image_batcher.dtype).itemsize * np.prod(self.image_batcher.shape)) + self.size = int( + np.dtype(self.image_batcher.dtype).itemsize + * np.prod(self.image_batcher.shape) + ) self.batch_allocation = common.cuda_call(cudart.cudaMalloc(self.size)) self.batch_generator = self.image_batcher.get_batch() @@ -80,8 +84,14 @@ def get_batch(self, names): return None try: batch, _, _ = next(self.batch_generator) - log.info("Calibrating image {} / {}".format(self.image_batcher.image_index, self.image_batcher.num_images)) - common.memcpy_host_to_device(self.batch_allocation, np.ascontiguousarray(batch)) + log.info( + "Calibrating image {} / {}".format( + self.image_batcher.image_index, self.image_batcher.num_images + ) + ) + common.memcpy_host_to_device( + self.batch_allocation, np.ascontiguousarray(batch) + ) return [int(self.batch_allocation)] except StopIteration: @@ -130,7 +140,9 @@ def __init__(self, verbose=False, workspace=8): self.builder = trt.Builder(self.trt_logger) self.config = self.builder.create_builder_config() - self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace * (2 ** 30)) + self.config.set_memory_pool_limit( + trt.MemoryPoolType.WORKSPACE, workspace * (2**30) + ) self.batch_size = None self.network = None @@ -158,13 +170,29 @@ def create_network(self, onnx_path): log.info("Network Description") for input in inputs: self.batch_size = input.shape[0] - log.info("Input '{}' with shape {} and dtype {}".format(input.name, input.shape, input.dtype)) + log.info( + "Input '{}' with shape {} and dtype {}".format( + input.name, input.shape, input.dtype + ) + ) for output in outputs: - log.info("Output '{}' with shape {} and dtype {}".format(output.name, output.shape, output.dtype)) + log.info( + "Output '{}' with shape {} and dtype {}".format( + output.name, output.shape, output.dtype + ) + ) assert self.batch_size > 0 - def create_engine(self, engine_path, precision, config_file, calib_input=None, calib_cache=None, calib_num_images=5000, - calib_batch_size=8): + def create_engine( + self, + engine_path, + precision, + config_file, + calib_input=None, + calib_cache=None, + calib_num_images=5000, + calib_batch_size=8, + ): """ Build the TensorRT engine and serialize it to disk. :param engine_path: The path where to serialize the engine to. @@ -194,8 +222,15 @@ def create_engine(self, engine_path, precision, config_file, calib_input=None, c calib_shape = [calib_batch_size] + list(inputs[0].shape[1:]) calib_dtype = trt.nptype(inputs[0].dtype) self.config.int8_calibrator.set_image_batcher( - ImageBatcher(calib_input, calib_shape, calib_dtype, max_num_images=calib_num_images, - exact_batches=True, config_file=config_file)) + ImageBatcher( + calib_input, + calib_shape, + calib_dtype, + max_num_images=calib_num_images, + exact_batches=True, + config_file=config_file, + ) + ) engine_bytes = self.builder.build_serialized_network(self.network, self.config) if engine_bytes is None: @@ -210,34 +245,76 @@ def create_engine(self, engine_path, precision, config_file, calib_input=None, c def main(args): builder = EngineBuilder(args.verbose, args.workspace) builder.create_network(args.onnx) - builder.create_engine(args.engine, args.precision, args.det2_config, args.calib_input, args.calib_cache, args.calib_num_images, - args.calib_batch_size) + builder.create_engine( + args.engine, + args.precision, + args.det2_config, + args.calib_input, + args.calib_cache, + args.calib_num_images, + args.calib_batch_size, + ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-o", "--onnx", help="The input ONNX model file to load") parser.add_argument("-e", "--engine", help="The output path for the TRT engine") - parser.add_argument("-c", "--det2_config", default=None, help="The Detectron 2 config file (.yaml) for the model", type=str) - parser.add_argument("-p", "--precision", default="fp16", choices=["fp32", "fp16", "int8"], - help="The precision mode to build in, either fp32/fp16/int8, default: 'fp16'") - parser.add_argument("-v", "--verbose", action="store_true", help="Enable more verbose log output") - parser.add_argument("-w", "--workspace", default=1, type=int, help="The max memory workspace size to allow in Gb, " - "default: 1") - parser.add_argument("--calib_input", help="The directory holding images to use for calibration") - parser.add_argument("--calib_cache", default="./calibration.cache", - help="The file path for INT8 calibration cache to use, default: ./calibration.cache") - parser.add_argument("--calib_num_images", default=5000, type=int, - help="The maximum number of images to use for calibration, default: 5000") - parser.add_argument("--calib_batch_size", default=8, type=int, - help="The batch size for the calibration process, default: 8") + parser.add_argument( + "-c", + "--det2_config", + default=None, + help="The Detectron 2 config file (.yaml) for the model", + type=str, + ) + parser.add_argument( + "-p", + "--precision", + default="fp16", + choices=["fp32", "fp16", "int8"], + help="The precision mode to build in, either fp32/fp16/int8, default: 'fp16'", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="Enable more verbose log output" + ) + parser.add_argument( + "-w", + "--workspace", + default=1, + type=int, + help="The max memory workspace size to allow in Gb, " "default: 1", + ) + parser.add_argument( + "--calib_input", help="The directory holding images to use for calibration" + ) + parser.add_argument( + "--calib_cache", + default="./calibration.cache", + help="The file path for INT8 calibration cache to use, default: ./calibration.cache", + ) + parser.add_argument( + "--calib_num_images", + default=5000, + type=int, + help="The maximum number of images to use for calibration, default: 5000", + ) + parser.add_argument( + "--calib_batch_size", + default=8, + type=int, + help="The batch size for the calibration process, default: 8", + ) args = parser.parse_args() if not all([args.onnx, args.engine]): parser.print_help() log.error("These arguments are required: --onnx and --engine") sys.exit(1) - if args.precision in ["int8"] and not (args.calib_input or os.path.exists(args.calib_cache)): + if args.precision in ["int8"] and not ( + args.calib_input or os.path.exists(args.calib_cache) + ): parser.print_help() - log.error("When building in int8 precision, --calib_input or an existing --calib_cache file is required") + log.error( + "When building in int8 precision, --calib_input or an existing --calib_cache file is required" + ) sys.exit(1) main(args) diff --git a/samples/python/detectron2/create_onnx.py b/samples/python/detectron2/create_onnx.py index 38538464..478ead75 100644 --- a/samples/python/detectron2/create_onnx.py +++ b/samples/python/detectron2/create_onnx.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -34,7 +34,9 @@ from detectron2.structures import ImageList except ImportError: print("Could not import Detectron 2 modules. Maybe you did not install Detectron 2") - print("Please install Detectron 2, check https://github.com/facebookresearch/detectron2/blob/main/INSTALL.md") + print( + "Please install Detectron 2, check https://github.com/facebookresearch/detectron2/blob/main/INSTALL.md" + ) sys.exit(1) import onnx_utils @@ -81,14 +83,24 @@ def det2_setup(config_file, weights): self.first_NMS_max_proposals = self.det2_cfg.MODEL.RPN.POST_NMS_TOPK_TEST self.first_NMS_iou_threshold = self.det2_cfg.MODEL.RPN.NMS_THRESH self.first_NMS_score_threshold = 0.01 - self.first_ROIAlign_pooled_size = self.det2_cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION - self.first_ROIAlign_sampling_ratio = self.det2_cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO + self.first_ROIAlign_pooled_size = ( + self.det2_cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION + ) + self.first_ROIAlign_sampling_ratio = ( + self.det2_cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO + ) self.first_ROIAlign_type = self.det2_cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE self.second_NMS_max_proposals = self.det2_cfg.TEST.DETECTIONS_PER_IMAGE self.second_NMS_iou_threshold = self.det2_cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST - self.second_NMS_score_threshold = self.det2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST - self.second_ROIAlign_pooled_size = self.det2_cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION - self.second_ROIAlign_sampling_ratio = self.det2_cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO + self.second_NMS_score_threshold = ( + self.det2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST + ) + self.second_ROIAlign_pooled_size = ( + self.det2_cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION + ) + self.second_ROIAlign_sampling_ratio = ( + self.det2_cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO + ) self.second_ROIAlign_type = self.det2_cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE self.mask_out_res = 28 @@ -97,17 +109,37 @@ def det2_setup(config_file, weights): log.info("Number of classes is {}".format(self.num_classes)) log.info("First NMS max proposals is {}".format(self.first_NMS_max_proposals)) log.info("First NMS iou threshold is {}".format(self.first_NMS_iou_threshold)) - log.info("First NMS score threshold is {}".format(self.first_NMS_score_threshold)) + log.info( + "First NMS score threshold is {}".format(self.first_NMS_score_threshold) + ) log.info("First ROIAlign type is {}".format(self.first_ROIAlign_type)) - log.info("First ROIAlign pooled size is {}".format(self.first_ROIAlign_pooled_size)) - log.info("First ROIAlign sampling ratio is {}".format(self.first_ROIAlign_sampling_ratio)) + log.info( + "First ROIAlign pooled size is {}".format(self.first_ROIAlign_pooled_size) + ) + log.info( + "First ROIAlign sampling ratio is {}".format( + self.first_ROIAlign_sampling_ratio + ) + ) log.info("Second NMS max proposals is {}".format(self.second_NMS_max_proposals)) log.info("Second NMS iou threshold is {}".format(self.second_NMS_iou_threshold)) - log.info("Second NMS score threshold is {}".format(self.second_NMS_score_threshold)) + log.info( + "Second NMS score threshold is {}".format(self.second_NMS_score_threshold) + ) log.info("Second ROIAlign type is {}".format(self.second_ROIAlign_type)) - log.info("Second ROIAlign pooled size is {}".format(self.second_ROIAlign_pooled_size)) - log.info("Second ROIAlign sampling ratio is {}".format(self.second_ROIAlign_sampling_ratio)) - log.info("Individual mask output resolution is {}x{}".format(self.mask_out_res, self.mask_out_res)) + log.info( + "Second ROIAlign pooled size is {}".format(self.second_ROIAlign_pooled_size) + ) + log.info( + "Second ROIAlign sampling ratio is {}".format( + self.second_ROIAlign_sampling_ratio + ) + ) + log.info( + "Individual mask output resolution is {}x{}".format( + self.mask_out_res, self.mask_out_res + ) + ) self.batch_size = None @@ -128,12 +160,16 @@ def sanitize(self): model = shape_inference.infer_shapes(model) self.graph = gs.import_onnx(model) except Exception as e: - log.info("Shape inference could not be performed at this time:\n{}".format(e)) + log.info( + "Shape inference could not be performed at this time:\n{}".format(e) + ) try: self.graph.fold_constants(fold_shapes=True) except TypeError as e: - log.error("This version of ONNX GraphSurgeon does not support folding shapes, please upgrade your " - "onnx_graphsurgeon module. Error:\n{}".format(e)) + log.error( + "This version of ONNX GraphSurgeon does not support folding shapes, please upgrade your " + "onnx_graphsurgeon module. Error:\n{}".format(e) + ) raise count_after = len(self.graph.nodes) @@ -182,7 +218,9 @@ def get_anchors(self, sample_image): p4_anchors = det2_anchors[2].tensor.detach().cpu().numpy() p5_anchors = det2_anchors[3].tensor.detach().cpu().numpy() p6_anchors = det2_anchors[4].tensor.detach().cpu().numpy() - final_anchors = np.concatenate((p2_anchors,p3_anchors,p4_anchors,p5_anchors,p6_anchors)) + final_anchors = np.concatenate( + (p2_anchors, p3_anchors, p4_anchors, p5_anchors, p6_anchors) + ) return final_anchors @@ -214,18 +252,29 @@ def update_preprocessor(self, batch_size): self.graph.inputs[0].name = "input_tensor" self.sanitize() - log.info("ONNX graph input shape: {} [NCHW format set]".format(self.graph.inputs[0].shape)) + log.info( + "ONNX graph input shape: {} [NCHW format set]".format( + self.graph.inputs[0].shape + ) + ) # Find the initial nodes of the graph, whatever the input is first connected to, and disconnect them. - for node in [node for node in self.graph.nodes if self.graph.inputs[0] in node.inputs]: + for node in [ + node for node in self.graph.nodes if self.graph.inputs[0] in node.inputs + ]: node.inputs.clear() # Get input tensor. input_tensor = self.graph.inputs[0] # Create preprocessing Sub node and connect input tensor to it. - sub_const = np.expand_dims(np.asarray([255 * 0.406, 255 * 0.456, 255 * 0.485], dtype=np.float32), axis=(1, 2)) - sub_out = self.graph.op_with_const("Sub", "preprocessor/mean", input_tensor, sub_const) + sub_const = np.expand_dims( + np.asarray([255 * 0.406, 255 * 0.456, 255 * 0.485], dtype=np.float32), + axis=(1, 2), + ) + sub_out = self.graph.op_with_const( + "Sub", "preprocessor/mean", input_tensor, sub_const + ) # Find first Div node and connect to output of Sub node. div_node = self.graph.find_node_by_op("Div") @@ -242,7 +291,19 @@ def update_preprocessor(self, batch_size): if type(node.inputs[1]) == gs.Constant and node.inputs[1].values[0] == 1: node.inputs[1].values[0] = self.batch_size - def NMS(self, boxes, scores, anchors, background_class, score_activation, max_proposals, iou_threshold, nms_score_threshold, user_threshold, nms_name=None): + def NMS( + self, + boxes, + scores, + anchors, + background_class, + score_activation, + max_proposals, + iou_threshold, + nms_score_threshold, + user_threshold, + nms_name=None, + ): # Helper function to create the NMS Plugin node with the selected inputs. # EfficientNMS_TRT TensorRT Plugin is suitable for our use case. # :param boxes: The box predictions from the Box Net. @@ -263,41 +324,71 @@ def NMS(self, boxes, scores, anchors, background_class, score_activation, max_pr nms_name = "_" + nms_name # Set score threshold. - score_threshold = nms_score_threshold if user_threshold is None else user_threshold + score_threshold = ( + nms_score_threshold if user_threshold is None else user_threshold + ) # NMS Outputs. - nms_output_num_detections = gs.Variable(name="num_detections"+nms_name, dtype=np.int32, shape=[self.batch_size, 1]) - nms_output_boxes = gs.Variable(name="detection_boxes"+nms_name, dtype=np.float32, - shape=[self.batch_size, max_proposals, 4]) - nms_output_scores = gs.Variable(name="detection_scores"+nms_name, dtype=np.float32, - shape=[self.batch_size, max_proposals]) - nms_output_classes = gs.Variable(name="detection_classes"+nms_name, dtype=np.int32, - shape=[self.batch_size, max_proposals]) + nms_output_num_detections = gs.Variable( + name="num_detections" + nms_name, dtype=np.int32, shape=[self.batch_size, 1] + ) + nms_output_boxes = gs.Variable( + name="detection_boxes" + nms_name, + dtype=np.float32, + shape=[self.batch_size, max_proposals, 4], + ) + nms_output_scores = gs.Variable( + name="detection_scores" + nms_name, + dtype=np.float32, + shape=[self.batch_size, max_proposals], + ) + nms_output_classes = gs.Variable( + name="detection_classes" + nms_name, + dtype=np.int32, + shape=[self.batch_size, max_proposals], + ) - nms_outputs = [nms_output_num_detections, nms_output_boxes, nms_output_scores, nms_output_classes] + nms_outputs = [ + nms_output_num_detections, + nms_output_boxes, + nms_output_scores, + nms_output_classes, + ] # Plugin. self.graph.plugin( op="EfficientNMS_TRT", - name="nms"+nms_name, + name="nms" + nms_name, inputs=[boxes, scores, anchors], outputs=nms_outputs, attrs={ - 'plugin_version': "1", - 'background_class': background_class, - 'max_output_boxes': max_proposals, - 'score_threshold': max(0.01, score_threshold), - 'iou_threshold': iou_threshold, - 'score_activation': score_activation, - 'class_agnostic': False, - 'box_coding': 1, - } + "plugin_version": "1", + "background_class": background_class, + "max_output_boxes": max_proposals, + "score_threshold": max(0.01, score_threshold), + "iou_threshold": iou_threshold, + "score_activation": score_activation, + "class_agnostic": False, + "box_coding": 1, + }, ) log.info("Created nms{} with EfficientNMS_TRT plugin".format(nms_name)) return nms_outputs - def ROIAlign(self, rois, p2, p3, p4, p5, pooled_size, sampling_ratio, roi_align_type, num_rois, ra_name): + def ROIAlign( + self, + rois, + p2, + p3, + p4, + p5, + pooled_size, + sampling_ratio, + roi_align_type, + num_rois, + ra_name, + ): # Helper function to create the ROIAlign Plugin node with the selected inputs. # PyramidROIAlign_TRT TensorRT Plugin is suitable for our use case. # :param rois: Regions of interest/detection boxes outputs from preceding NMS node. @@ -318,31 +409,42 @@ def ROIAlign(self, rois, p2, p3, p4, p5, pooled_size, sampling_ratio, roi_align_ roi_coords_transform = 0 # ROIAlign outputs. - roi_align_output = gs.Variable(name="roi_align/output_"+ra_name, dtype=np.float32, - shape=[self.batch_size, num_rois, self.fpn_out_channels, pooled_size, pooled_size]) + roi_align_output = gs.Variable( + name="roi_align/output_" + ra_name, + dtype=np.float32, + shape=[ + self.batch_size, + num_rois, + self.fpn_out_channels, + pooled_size, + pooled_size, + ], + ) # Plugin. self.graph.plugin( op="PyramidROIAlign_TRT", - name="roi_align_"+ra_name, + name="roi_align_" + ra_name, inputs=[rois, p2, p3, p4, p5], outputs=[roi_align_output], attrs={ - 'plugin_version': "1", - 'fpn_scale': 224, - 'pooled_size': pooled_size, - 'image_size': [self.height, self.width], - 'roi_coords_absolute': 0, - 'roi_coords_swap': 0, - 'roi_coords_transform': roi_coords_transform, - 'sampling_ratio': sampling_ratio, - } + "plugin_version": "1", + "fpn_scale": 224, + "pooled_size": pooled_size, + "image_size": [self.height, self.width], + "roi_coords_absolute": 0, + "roi_coords_swap": 0, + "roi_coords_transform": roi_coords_transform, + "sampling_ratio": sampling_ratio, + }, ) log.info("Created {} with PyramidROIAlign_TRT plugin".format(ra_name)) return roi_align_output - def process_graph(self, anchors, first_nms_threshold=None, second_nms_threshold=None): + def process_graph( + self, anchors, first_nms_threshold=None, second_nms_threshold=None + ): """ Processes the graph to replace the GenerateProposals and BoxWithNMSLimit operations with EfficientNMS_TRT TensorRT plugin nodes and ROIAlign operations with PyramidROIAlign_TRT plugin nodes. @@ -351,6 +453,7 @@ def process_graph(self, anchors, first_nms_threshold=None, second_nms_threshold= :param first_nms_threshold: Override the 1st NMS score threshold value. If set to None, use the value in the graph. :param second_nms_threshold: Override the 2nd NMS score threshold value. If set to None, use the value in the graph. """ + def backbone(): """ Updates the graph to replace all ResizeNearest ops with ResizeNearest plugins in backbone. @@ -361,7 +464,6 @@ def backbone(): p4 = self.graph.find_node_by_op_name("Conv", "/backbone/fpn_output4/Conv") p5 = self.graph.find_node_by_op_name("Conv", "/backbone/fpn_output5/Conv") - return p2.outputs[0], p3.outputs[0], p4.outputs[0], p5.outputs[0] def proposal_generator(anchors, first_nms_threshold): @@ -372,38 +474,101 @@ def proposal_generator(anchors, first_nms_threshold): :param first_nms_threshold: Override the 1st NMS score threshold value. If set to None, use the value in the graph. """ # Get nodes containing final objectness logits. - p2_logits = self.graph.find_node_by_op_name("Flatten", "/proposal_generator/Flatten") - p3_logits = self.graph.find_node_by_op_name("Flatten", "/proposal_generator/Flatten_1") - p4_logits = self.graph.find_node_by_op_name("Flatten", "/proposal_generator/Flatten_2") - p5_logits = self.graph.find_node_by_op_name("Flatten", "/proposal_generator/Flatten_3") - p6_logits = self.graph.find_node_by_op_name("Flatten", "/proposal_generator/Flatten_4") + p2_logits = self.graph.find_node_by_op_name( + "Flatten", "/proposal_generator/Flatten" + ) + p3_logits = self.graph.find_node_by_op_name( + "Flatten", "/proposal_generator/Flatten_1" + ) + p4_logits = self.graph.find_node_by_op_name( + "Flatten", "/proposal_generator/Flatten_2" + ) + p5_logits = self.graph.find_node_by_op_name( + "Flatten", "/proposal_generator/Flatten_3" + ) + p6_logits = self.graph.find_node_by_op_name( + "Flatten", "/proposal_generator/Flatten_4" + ) # Get nodes containing final anchor_deltas. - p2_anchors = self.graph.find_node_by_op_name("Reshape", "/proposal_generator/Reshape_1") - p3_anchors = self.graph.find_node_by_op_name("Reshape", "/proposal_generator/Reshape_3") - p4_anchors = self.graph.find_node_by_op_name("Reshape", "/proposal_generator/Reshape_5") - p5_anchors = self.graph.find_node_by_op_name("Reshape", "/proposal_generator/Reshape_7") - p6_anchors = self.graph.find_node_by_op_name("Reshape", "/proposal_generator/Reshape_9") + p2_anchors = self.graph.find_node_by_op_name( + "Reshape", "/proposal_generator/Reshape_1" + ) + p3_anchors = self.graph.find_node_by_op_name( + "Reshape", "/proposal_generator/Reshape_3" + ) + p4_anchors = self.graph.find_node_by_op_name( + "Reshape", "/proposal_generator/Reshape_5" + ) + p5_anchors = self.graph.find_node_by_op_name( + "Reshape", "/proposal_generator/Reshape_7" + ) + p6_anchors = self.graph.find_node_by_op_name( + "Reshape", "/proposal_generator/Reshape_9" + ) # Concatenate all objectness logits/scores data. - scores_inputs = [p2_logits.outputs[0], p3_logits.outputs[0], p4_logits.outputs[0], p5_logits.outputs[0], p6_logits.outputs[0]] - scores_tensor = self.graph.layer(name="scores", op="Concat", inputs=scores_inputs, outputs=['scores'], attrs={'axis': 1})[0] + scores_inputs = [ + p2_logits.outputs[0], + p3_logits.outputs[0], + p4_logits.outputs[0], + p5_logits.outputs[0], + p6_logits.outputs[0], + ] + scores_tensor = self.graph.layer( + name="scores", + op="Concat", + inputs=scores_inputs, + outputs=["scores"], + attrs={"axis": 1}, + )[0] # Unsqueeze to add 3rd dimension of 1 to match tensor dimensions of boxes tensor. scores = self.graph.unsqueeze("scores_unsqueeze", scores_tensor, [2])[0] # Concatenate all boxes/anchor_delta data. - boxes_inputs = [p2_anchors.outputs[0], p3_anchors.outputs[0], p4_anchors.outputs[0], p5_anchors.outputs[0], p6_anchors.outputs[0]] - boxes = self.graph.layer(name="boxes", op="Concat", inputs=boxes_inputs, outputs=['anchors'], attrs={'axis': 1})[0] + boxes_inputs = [ + p2_anchors.outputs[0], + p3_anchors.outputs[0], + p4_anchors.outputs[0], + p5_anchors.outputs[0], + p6_anchors.outputs[0], + ] + boxes = self.graph.layer( + name="boxes", + op="Concat", + inputs=boxes_inputs, + outputs=["anchors"], + attrs={"axis": 1}, + )[0] # Convert the anchors from Corners to CenterSize encoding. - anchors = np.matmul(anchors, [[0.5, 0, -1, 0], [0, 0.5, 0, -1], [0.5, 0, 1, 0], [0, 0.5, 0, 1]]) - anchors = anchors / [self.width, self.height, self.width, self.height] # Normalize anchors to [0-1] range + anchors = np.matmul( + anchors, + [[0.5, 0, -1, 0], [0, 0.5, 0, -1], [0.5, 0, 1, 0], [0, 0.5, 0, 1]], + ) + anchors = anchors / [ + self.width, + self.height, + self.width, + self.height, + ] # Normalize anchors to [0-1] range anchors = np.expand_dims(anchors, axis=0) anchors = anchors.astype(np.float32) anchors = gs.Constant(name="default_anchors", values=anchors) # Create NMS node. - nms_outputs = self.NMS(boxes, scores, anchors, -1, False, self.first_NMS_max_proposals, self.first_NMS_iou_threshold, self.first_NMS_score_threshold, first_nms_threshold, 'rpn') + nms_outputs = self.NMS( + boxes, + scores, + anchors, + -1, + False, + self.first_NMS_max_proposals, + self.first_NMS_iou_threshold, + self.first_NMS_score_threshold, + first_nms_threshold, + "rpn", + ) return nms_outputs @@ -422,63 +587,149 @@ def roi_heads(rpn_outputs, p2, p3, p4, p5, second_nms_threshold): :param second_nms_threshold: Override the 2nd NMS score threshold value. If set to None, use the value in the graph. """ # Create ROIAlign node. - box_pooler_output = self.ROIAlign(rpn_outputs[1], p2, p3, p4, p5, self.first_ROIAlign_pooled_size, self.first_ROIAlign_sampling_ratio, self.first_ROIAlign_type, self.first_NMS_max_proposals, 'box_pooler') + box_pooler_output = self.ROIAlign( + rpn_outputs[1], + p2, + p3, + p4, + p5, + self.first_ROIAlign_pooled_size, + self.first_ROIAlign_sampling_ratio, + self.first_ROIAlign_type, + self.first_NMS_max_proposals, + "box_pooler", + ) # Reshape node that prepares ROIAlign/box pooler output for Gemm node that comes next. - box_pooler_shape = np.asarray([-1, self.fpn_out_channels*self.first_ROIAlign_pooled_size*self.first_ROIAlign_pooled_size], dtype=np.int64) - box_pooler_reshape = self.graph.op_with_const("Reshape", "box_pooler/reshape", box_pooler_output, box_pooler_shape) + box_pooler_shape = np.asarray( + [ + -1, + self.fpn_out_channels + * self.first_ROIAlign_pooled_size + * self.first_ROIAlign_pooled_size, + ], + dtype=np.int64, + ) + box_pooler_reshape = self.graph.op_with_const( + "Reshape", "box_pooler/reshape", box_pooler_output, box_pooler_shape + ) # Get first Gemm op of box head and connect box pooler to it. - first_box_head_gemm = self.graph.find_node_by_op_name("Gemm", "/roi_heads/box_head/fc1/Gemm") + first_box_head_gemm = self.graph.find_node_by_op_name( + "Gemm", "/roi_heads/box_head/fc1/Gemm" + ) first_box_head_gemm.inputs[0] = box_pooler_reshape[0] # Get final two nodes of box predictor. Softmax op for cls_score, Gemm op for bbox_pred. cls_score = self.graph.find_node_by_op_name("Softmax", "/roi_heads/Softmax") - bbox_pred = self.graph.find_node_by_op_name("Gemm", "/roi_heads/box_predictor/bbox_pred/Gemm") + bbox_pred = self.graph.find_node_by_op_name( + "Gemm", "/roi_heads/box_predictor/bbox_pred/Gemm" + ) # Linear transformation to convert box coordinates from (TopLeft, BottomRight) Corner encoding # to CenterSize encoding. 1st NMS boxes are multiplied by transformation matrix in order to # encode it into CenterSize format. - matmul_const = np.matrix('0.5 0 -1 0; 0 0.5 0 -1; 0.5 0 1 0; 0 0.5 0 1', dtype=np.float32) - matmul_out = self.graph.matmul("RPN_NMS/detection_boxes_conversion", rpn_outputs[1], matmul_const) + matmul_const = np.matrix( + "0.5 0 -1 0; 0 0.5 0 -1; 0.5 0 1 0; 0 0.5 0 1", dtype=np.float32 + ) + matmul_out = self.graph.matmul( + "RPN_NMS/detection_boxes_conversion", rpn_outputs[1], matmul_const + ) # Reshape node that prepares bbox_pred for scaling and second NMS. - bbox_pred_shape = np.asarray([self.batch_size, self.first_NMS_max_proposals, self.num_classes, 4], dtype=np.int64) - bbox_pred_reshape = self.graph.op_with_const("Reshape", "bbox_pred/reshape", bbox_pred.outputs[0], bbox_pred_shape) + bbox_pred_shape = np.asarray( + [self.batch_size, self.first_NMS_max_proposals, self.num_classes, 4], + dtype=np.int64, + ) + bbox_pred_reshape = self.graph.op_with_const( + "Reshape", "bbox_pred/reshape", bbox_pred.outputs[0], bbox_pred_shape + ) # 0.1, 0.1, 0.2, 0.2 are localization head variance numbers, they scale bbox_pred_reshape, in order to get accurate coordinates. - scale_adj = np.expand_dims(np.asarray([0.1, 0.1, 0.2, 0.2], dtype=np.float32), axis=(0, 1)) - final_bbox_pred = self.graph.op_with_const("Mul", "bbox_pred/scale", bbox_pred_reshape[0], scale_adj) + scale_adj = np.expand_dims( + np.asarray([0.1, 0.1, 0.2, 0.2], dtype=np.float32), axis=(0, 1) + ) + final_bbox_pred = self.graph.op_with_const( + "Mul", "bbox_pred/scale", bbox_pred_reshape[0], scale_adj + ) # Reshape node that prepares cls_score for slicing and second NMS. - cls_score_shape = np.array([self.batch_size, self.first_NMS_max_proposals, self.num_classes+1], dtype=np.int64) - cls_score_reshape = self.graph.op_with_const("Reshape", "cls_score/reshape", cls_score.outputs[0], cls_score_shape) + cls_score_shape = np.array( + [self.batch_size, self.first_NMS_max_proposals, self.num_classes + 1], + dtype=np.int64, + ) + cls_score_reshape = self.graph.op_with_const( + "Reshape", "cls_score/reshape", cls_score.outputs[0], cls_score_shape + ) # Slice operation to adjust third dimension of cls_score tensor, deletion of background class (81 in Detectron 2). - final_cls_score = self.graph.slice("cls_score/slicer", cls_score_reshape[0], 0, self.num_classes, 2) + final_cls_score = self.graph.slice( + "cls_score/slicer", cls_score_reshape[0], 0, self.num_classes, 2 + ) # Create NMS node. - nms_outputs = self.NMS(final_bbox_pred[0], final_cls_score[0], matmul_out[0], -1, False, self.second_NMS_max_proposals, self.second_NMS_iou_threshold, self.second_NMS_score_threshold, second_nms_threshold, 'box_outputs') + nms_outputs = self.NMS( + final_bbox_pred[0], + final_cls_score[0], + matmul_out[0], + -1, + False, + self.second_NMS_max_proposals, + self.second_NMS_iou_threshold, + self.second_NMS_score_threshold, + second_nms_threshold, + "box_outputs", + ) # Create ROIAlign node. - mask_pooler_output = self.ROIAlign(nms_outputs[1], p2, p3, p4, p5, self.second_ROIAlign_pooled_size, self.second_ROIAlign_sampling_ratio, self.second_ROIAlign_type, self.second_NMS_max_proposals, 'mask_pooler') + mask_pooler_output = self.ROIAlign( + nms_outputs[1], + p2, + p3, + p4, + p5, + self.second_ROIAlign_pooled_size, + self.second_ROIAlign_sampling_ratio, + self.second_ROIAlign_type, + self.second_NMS_max_proposals, + "mask_pooler", + ) # Reshape mask pooler output. - mask_pooler_shape = np.asarray([self.second_NMS_max_proposals*self.batch_size, self.fpn_out_channels, self.second_ROIAlign_pooled_size, self.second_ROIAlign_pooled_size], dtype=np.int64) - mask_pooler_reshape_node = self.graph.op_with_const("Reshape", "mask_pooler/reshape", mask_pooler_output, mask_pooler_shape) + mask_pooler_shape = np.asarray( + [ + self.second_NMS_max_proposals * self.batch_size, + self.fpn_out_channels, + self.second_ROIAlign_pooled_size, + self.second_ROIAlign_pooled_size, + ], + dtype=np.int64, + ) + mask_pooler_reshape_node = self.graph.op_with_const( + "Reshape", "mask_pooler/reshape", mask_pooler_output, mask_pooler_shape + ) # Get first Conv op in mask head and connect ROIAlign's squeezed output to it. - mask_head_conv = self.graph.find_node_by_op_name("Conv", "/roi_heads/mask_head/mask_fcn1/Conv") + mask_head_conv = self.graph.find_node_by_op_name( + "Conv", "/roi_heads/mask_head/mask_fcn1/Conv" + ) mask_head_conv.inputs[0] = mask_pooler_reshape_node[0] # Reshape node that is preparing 2nd NMS class outputs for Add node that comes next. - classes_reshape_shape = np.asarray([self.second_NMS_max_proposals*self.batch_size], dtype=np.int64) - classes_reshape_node = self.graph.op_with_const("Reshape", "box_outputs/reshape_classes", nms_outputs[3], classes_reshape_shape) + classes_reshape_shape = np.asarray( + [self.second_NMS_max_proposals * self.batch_size], dtype=np.int64 + ) + classes_reshape_node = self.graph.op_with_const( + "Reshape", + "box_outputs/reshape_classes", + nms_outputs[3], + classes_reshape_shape, + ) # This loop will generate an array used in Add node, which eventually will help Gather node to pick the single # class of interest per bounding box, instead of creating 80 masks for every single bounding box. add_array = [] - for i in range(self.second_NMS_max_proposals*self.batch_size): + for i in range(self.second_NMS_max_proposals * self.batch_size): if i == 0: start_pos = 0 else: @@ -488,23 +739,59 @@ def roi_heads(rpn_outputs, p2, p3, p4, p5, second_nms_threshold): # This Add node is one of the Gather node inputs, Gather node performs gather on 0th axis of data tensor # and requires indices that set tensors to be withing bounds, this Add node provides the bounds for Gather. add_array = np.asarray(add_array, dtype=np.int32) - classes_add_node = self.graph.op_with_const("Add", "box_outputs/add", classes_reshape_node[0], add_array) + classes_add_node = self.graph.op_with_const( + "Add", "box_outputs/add", classes_reshape_node[0], add_array + ) # Get the last Conv op in mask head and reshape it to correctly gather class of interest's masks. - last_conv = self.graph.find_node_by_op_name("Conv", "/roi_heads/mask_head/predictor/Conv") - last_conv_reshape_shape = np.asarray([self.second_NMS_max_proposals*self.num_classes*self.batch_size, self.mask_out_res, self.mask_out_res], dtype=np.int64) - last_conv_reshape_node = self.graph.op_with_const("Reshape", "mask_head/reshape_all_masks", last_conv.outputs[0], last_conv_reshape_shape) + last_conv = self.graph.find_node_by_op_name( + "Conv", "/roi_heads/mask_head/predictor/Conv" + ) + last_conv_reshape_shape = np.asarray( + [ + self.second_NMS_max_proposals * self.num_classes * self.batch_size, + self.mask_out_res, + self.mask_out_res, + ], + dtype=np.int64, + ) + last_conv_reshape_node = self.graph.op_with_const( + "Reshape", + "mask_head/reshape_all_masks", + last_conv.outputs[0], + last_conv_reshape_shape, + ) # Gather node that selects only masks belonging to detected class, 79 other masks are discarded. - final_gather = self.graph.gather("mask_head/final_gather", last_conv_reshape_node[0], classes_add_node[0], 0) + final_gather = self.graph.gather( + "mask_head/final_gather", + last_conv_reshape_node[0], + classes_add_node[0], + 0, + ) # Get last Sigmoid node and connect Gather node to it. - mask_head_sigmoid = self.graph.find_node_by_op_name("Sigmoid", "/roi_heads/mask_head/Sigmoid") + mask_head_sigmoid = self.graph.find_node_by_op_name( + "Sigmoid", "/roi_heads/mask_head/Sigmoid" + ) mask_head_sigmoid.inputs[0] = final_gather[0] # Final Reshape node, reshapes output of Sigmoid, important for various batch_size support (not tested yet). - final_graph_reshape_shape = np.asarray([self.batch_size, self.second_NMS_max_proposals, self.mask_out_res, self.mask_out_res], dtype=np.int64) - final_graph_reshape_node = self.graph.op_with_const("Reshape", "mask_head/final_reshape", mask_head_sigmoid.outputs[0], final_graph_reshape_shape) + final_graph_reshape_shape = np.asarray( + [ + self.batch_size, + self.second_NMS_max_proposals, + self.mask_out_res, + self.mask_out_res, + ], + dtype=np.int64, + ) + final_graph_reshape_node = self.graph.op_with_const( + "Reshape", + "mask_head/final_reshape", + mask_head_sigmoid.outputs[0], + final_graph_reshape_shape, + ) final_graph_reshape_node[0].dtype = np.float32 final_graph_reshape_node[0].name = "detection_masks" @@ -513,7 +800,9 @@ def roi_heads(rpn_outputs, p2, p3, p4, p5, second_nms_threshold): # Only Detectron 2's Mask-RCNN R50-FPN 3x is supported currently. p2, p3, p4, p5 = backbone() rpn_outputs = proposal_generator(anchors, first_nms_threshold) - box_head_outputs, mask_head_output = roi_heads(rpn_outputs, p2, p3, p4, p5, second_nms_threshold) + box_head_outputs, mask_head_output = roi_heads( + rpn_outputs, p2, p3, p4, p5, second_nms_threshold + ) # Append segmentation head output. box_head_outputs.append(mask_head_output) # Set graph outputs, both bbox and segmentation heads. @@ -531,17 +820,55 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("-i", "--exported_onnx", help="The exported to ONNX Detectron 2 Mask R-CNN", type=str) - parser.add_argument("-o", "--onnx", help="The output ONNX model file to write", type=str) - parser.add_argument("-c", "--det2_config", help="The Detectron 2 config file (.yaml) for the model", type=str) - parser.add_argument("-w", "--det2_weights", help="The Detectron 2 model weights (.pkl)", type=str) - parser.add_argument("-s", "--sample_image", help="Sample image for anchors generation", type=str) - parser.add_argument("-b", "--batch_size", help="Batch size for the model", type=int, default=1) - parser.add_argument("-t1", "--first_nms_threshold", help="Override the score threshold for the 1st NMS operation", type=float) - parser.add_argument("-t2", "--second_nms_threshold", help="Override the score threshold for the 2nd NMS operation", type=float) + parser.add_argument( + "-i", + "--exported_onnx", + help="The exported to ONNX Detectron 2 Mask R-CNN", + type=str, + ) + parser.add_argument( + "-o", "--onnx", help="The output ONNX model file to write", type=str + ) + parser.add_argument( + "-c", + "--det2_config", + help="The Detectron 2 config file (.yaml) for the model", + type=str, + ) + parser.add_argument( + "-w", "--det2_weights", help="The Detectron 2 model weights (.pkl)", type=str + ) + parser.add_argument( + "-s", "--sample_image", help="Sample image for anchors generation", type=str + ) + parser.add_argument( + "-b", "--batch_size", help="Batch size for the model", type=int, default=1 + ) + parser.add_argument( + "-t1", + "--first_nms_threshold", + help="Override the score threshold for the 1st NMS operation", + type=float, + ) + parser.add_argument( + "-t2", + "--second_nms_threshold", + help="Override the score threshold for the 2nd NMS operation", + type=float, + ) args = parser.parse_args() - if not all([args.exported_onnx, args.onnx, args.det2_config, args.det2_weights, args.sample_image]): + if not all( + [ + args.exported_onnx, + args.onnx, + args.det2_config, + args.det2_weights, + args.sample_image, + ] + ): parser.print_help() - print("\nThese arguments are required: --exported_onnx --onnx --det2_config --det2_weights and --sample_image") + print( + "\nThese arguments are required: --exported_onnx --onnx --det2_config --det2_weights and --sample_image" + ) sys.exit(1) main(args) diff --git a/samples/python/detectron2/eval_coco.py b/samples/python/detectron2/eval_coco.py index 828413d4..7afb6116 100644 --- a/samples/python/detectron2/eval_coco.py +++ b/samples/python/detectron2/eval_coco.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -31,9 +31,12 @@ from detectron2.structures import Instances, Boxes, ROIMasks except ImportError: print("Could not import Detectron 2 modules. Maybe you did not install Detectron 2") - print("Please install Detectron 2, check https://github.com/facebookresearch/detectron2/blob/main/INSTALL.md") + print( + "Please install Detectron 2, check https://github.com/facebookresearch/detectron2/blob/main/INSTALL.md" + ) sys.exit(1) + def build_evaluator(dataset_name): """ Create evaluator for a COCO dataset. @@ -45,6 +48,7 @@ def build_evaluator(dataset_name): else: raise NotImplementedError("Evaluator type is not supported") + def setup(config_file, weights): """ Create config and perform basic setup. @@ -55,6 +59,7 @@ def setup(config_file, weights): cfg.freeze() return cfg + def main(args): # Set up Detectron 2 config and build evaluator. cfg = setup(args.det2_config, args.det2_weights) @@ -63,10 +68,15 @@ def main(args): evaluator.reset() trt_infer = TensorRTInfer(args.engine) - batcher = ImageBatcher(args.input, *trt_infer.input_spec(), config_file=args.det2_config) + batcher = ImageBatcher( + args.input, *trt_infer.input_spec(), config_file=args.det2_config + ) for batch, images, scales in batcher.get_batch(): - print("Processing Image {} / {}".format(batcher.image_index, batcher.num_images), end="\r") + print( + "Processing Image {} / {}".format(batcher.image_index, batcher.num_images), + end="\r", + ) detections = trt_infer.infer(batch, scales, args.nms_threshold) for i in range(len(images)): # Get inference image resolution. @@ -85,13 +95,13 @@ def main(args): for n in range(num_instances): det = detections[i][n] # Append box coordinates data. - pred_boxes.append([det['ymin'], det['xmin'], det['ymax'], det['xmax']]) + pred_boxes.append([det["ymin"], det["xmin"], det["ymax"], det["xmax"]]) # Append score. - scores.append(det['score']) + scores.append(det["score"]) # Append class. - pred_classes.append(det['class']) + pred_classes.append(det["class"]) # Append mask. - pred_masks[n] = det['mask'] + pred_masks[n] = det["mask"] # Create new Instances object required for Detectron 2 evalutions and add: # boxes, scores, pred_classes, pred_masks. image_shape = (im_height, im_width) @@ -100,10 +110,12 @@ def main(args): instances.scores = torch.tensor(scores) instances.pred_classes = torch.tensor(pred_classes) roi_masks = ROIMasks(torch.tensor(pred_masks)) - instances.pred_masks = roi_masks.to_bitmasks(instances.pred_boxes, im_height, im_width, args.iou_threshold).tensor + instances.pred_masks = roi_masks.to_bitmasks( + instances.pred_boxes, im_height, im_width, args.iou_threshold + ).tensor # Process evaluations per image. - image_dict = [{'instances': instances}] - input_dict = [{'image_id': source_id}] + image_dict = [{"instances": instances}] + input_dict = [{"image_id": source_id}] evaluator.process(input_dict, image_dict) # Final evaluations, generation of mAP accuracy performance. @@ -113,17 +125,37 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with.") - parser.add_argument("-i", "--input", - help="The input to infer, either a single image path, or a directory of images.") - parser.add_argument("-c", "--det2_config", help="The Detectron 2 config file (.yaml) for the model", type=str) - parser.add_argument("-w", "--det2_weights", help="The Detectron 2 model weights (.pkl)", type=str) - parser.add_argument("-t", "--nms_threshold", type=float, - help="Override the score threshold for the NMS operation, if higher than the threshold in the engine.") - parser.add_argument("--iou_threshold", default=0.5, type=float, - help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0.") + parser.add_argument( + "-i", + "--input", + help="The input to infer, either a single image path, or a directory of images.", + ) + parser.add_argument( + "-c", + "--det2_config", + help="The Detectron 2 config file (.yaml) for the model", + type=str, + ) + parser.add_argument( + "-w", "--det2_weights", help="The Detectron 2 model weights (.pkl)", type=str + ) + parser.add_argument( + "-t", + "--nms_threshold", + type=float, + help="Override the score threshold for the NMS operation, if higher than the threshold in the engine.", + ) + parser.add_argument( + "--iou_threshold", + default=0.5, + type=float, + help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0.", + ) args = parser.parse_args() if not all([args.engine, args.input, args.det2_config, args.det2_weights]): parser.print_help() - print("\nThese arguments are required: --engine --input --det2_config and --det2_weights") + print( + "\nThese arguments are required: --engine --input --det2_config and --det2_weights" + ) sys.exit(1) main(args) diff --git a/samples/python/detectron2/image_batcher.py b/samples/python/detectron2/image_batcher.py index 228798ad..0fb1d90a 100644 --- a/samples/python/detectron2/image_batcher.py +++ b/samples/python/detectron2/image_batcher.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,15 +24,26 @@ from detectron2.config import get_cfg except ImportError: print("Could not import Detectron 2 modules. Maybe you did not install Detectron 2") - print("Please install Detectron 2, check https://github.com/facebookresearch/detectron2/blob/main/INSTALL.md") + print( + "Please install Detectron 2, check https://github.com/facebookresearch/detectron2/blob/main/INSTALL.md" + ) sys.exit(1) + class ImageBatcher: """ Creates batches of pre-processed images. """ - def __init__(self, input, shape, dtype, max_num_images=None, exact_batches=False, config_file=None): + def __init__( + self, + input, + shape, + dtype, + max_num_images=None, + exact_batches=False, + config_file=None, + ): """ :param input: The input directory to read images from. :param shape: The tensor shape of the batch to prepare, either in NCHW or NHWC format. @@ -68,10 +79,16 @@ def det2_setup(config_file): extensions = [".jpg", ".jpeg", ".png", ".bmp", ".ppm"] def is_image(path): - return os.path.isfile(path) and os.path.splitext(path)[1].lower() in extensions + return ( + os.path.isfile(path) and os.path.splitext(path)[1].lower() in extensions + ) if os.path.isdir(input): - self.images = [os.path.join(input, f) for f in os.listdir(input) if is_image(os.path.join(input, f))] + self.images = [ + os.path.join(input, f) + for f in os.listdir(input) + if is_image(os.path.join(input, f)) + ] self.images.sort() elif os.path.isfile(input): if is_image(input): @@ -108,7 +125,7 @@ def is_image(path): if self.num_images < 1: print("Not enough images to create batches") sys.exit(1) - self.images = self.images[0:self.num_images] + self.images = self.images[0 : self.num_images] # Subdivide the list of images into batches. self.num_batches = 1 + int((self.num_images - 1) / self.batch_size) @@ -122,7 +139,6 @@ def is_image(path): self.image_index = 0 self.batch_index = 0 - def preprocess_image(self, image_path): """ The image preprocessor loads an image from disk and prepares it as needed for batching. This includes padding, @@ -165,7 +181,7 @@ def resize_pad(image, pad_color=(0, 0, 0)): newh = int(newh + 0.5) # Scaling factor for normalized box coordinates scaling in post-processing. - scaling = max(newh/height, neww/width) + scaling = max(newh / height, neww / width) # Padding. image = image.resize((neww, newh), resample=Image.BILINEAR) @@ -176,7 +192,7 @@ def resize_pad(image, pad_color=(0, 0, 0)): scale = None image = Image.open(image_path) - image = image.convert(mode='RGB') + image = image.convert(mode="RGB") # Pad with mean values of COCO dataset, since padding is applied before actual model's # preprocessor steps (Sub, Div ops), we need to pad with mean values in order to reverse # the effects of Sub and Div, so that padding after model's preprocessor will be with actual 0s. @@ -185,7 +201,7 @@ def resize_pad(image, pad_color=(0, 0, 0)): # Change HWC -> CHW. image = np.transpose(image, (2, 0, 1)) # Change RGB -> BGR. - return image[[2,1,0]], scale + return image[[2, 1, 0]], scale def get_batch(self): """ diff --git a/samples/python/detectron2/infer.py b/samples/python/detectron2/infer.py index db7c83b6..d086fb76 100644 --- a/samples/python/detectron2/infer.py +++ b/samples/python/detectron2/infer.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -27,6 +27,7 @@ sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) import common + class TensorRTInfer: """ Implements inference for the Model TensorRT engine. @@ -65,12 +66,12 @@ def __init__(self, engine_path): size *= s allocation = common.cuda_call(cudart.cudaMalloc(size)) binding = { - 'index': i, - 'name': name, - 'dtype': np.dtype(trt.nptype(dtype)), - 'shape': list(shape), - 'allocation': allocation, - 'size': size + "index": i, + "name": name, + "dtype": np.dtype(trt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "size": size, } self.allocations.append(allocation) if is_input: @@ -88,7 +89,7 @@ def input_spec(self): Get the specs for the input tensor of the network. Useful to prepare memory allocations. :return: Two items, the shape of the input tensor and its (numpy) datatype. """ - return self.inputs[0]['shape'], self.inputs[0]['dtype'] + return self.inputs[0]["shape"], self.inputs[0]["dtype"] def output_spec(self): """ @@ -97,7 +98,7 @@ def output_spec(self): """ specs = [] for o in self.outputs: - specs.append((o['shape'], o['dtype'])) + specs.append((o["shape"], o["dtype"])) return specs def infer(self, batch, scales=None, nms_threshold=None): @@ -115,11 +116,13 @@ def infer(self, batch, scales=None, nms_threshold=None): outputs.append(np.zeros(shape, dtype)) # Process I/O and execute the network. - common.memcpy_host_to_device(self.inputs[0]['allocation'], np.ascontiguousarray(batch)) + common.memcpy_host_to_device( + self.inputs[0]["allocation"], np.ascontiguousarray(batch) + ) self.context.execute_v2(self.allocations) for o in range(len(outputs)): - common.memcpy_device_to_host(outputs[o], self.outputs[o]['allocation']) + common.memcpy_device_to_host(outputs[o], self.outputs[o]["allocation"]) # Process the results. nums = outputs[0] @@ -136,7 +139,7 @@ def infer(self, batch, scales=None, nms_threshold=None): mask = masks[i][n] # Calculate scaling values for bboxes. - scale = self.inputs[0]['shape'][2] + scale = self.inputs[0]["shape"][2] scale /= scales[i] scale_y = scale scale_x = scale @@ -144,15 +147,17 @@ def infer(self, batch, scales=None, nms_threshold=None): if nms_threshold and scores[i][n] < nms_threshold: continue # Append to detections - detections[i].append({ - 'ymin': boxes[i][n][0] * scale_y, - 'xmin': boxes[i][n][1] * scale_x, - 'ymax': boxes[i][n][2] * scale_y, - 'xmax': boxes[i][n][3] * scale_x, - 'score': scores[i][n], - 'class': int(pred_classes[i][n]), - 'mask': mask, - }) + detections[i].append( + { + "ymin": boxes[i][n][0] * scale_y, + "xmin": boxes[i][n][1] * scale_x, + "ymax": boxes[i][n][2] * scale_y, + "xmax": boxes[i][n][3] * scale_x, + "score": scores[i][n], + "class": int(pred_classes[i][n]), + "mask": mask, + } + ) return detections @@ -160,22 +165,117 @@ def main(args): output_dir = os.path.realpath(args.output) os.makedirs(output_dir, exist_ok=True) - labels = ["person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier", "toothbrush"] + labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", + ] trt_infer = TensorRTInfer(args.engine) - batcher = ImageBatcher(args.input, *trt_infer.input_spec(), config_file=args.det2_config) + batcher = ImageBatcher( + args.input, *trt_infer.input_spec(), config_file=args.det2_config + ) for batch, images, scales in batcher.get_batch(): - print("Processing Image {} / {}".format(batcher.image_index, batcher.num_images), end="\r") + print( + "Processing Image {} / {}".format(batcher.image_index, batcher.num_images), + end="\r", + ) detections = trt_infer.infer(batch, scales, args.nms_threshold) for i in range(len(images)): basename = os.path.splitext(os.path.basename(images[i]))[0] # Image Visualizations output_path = os.path.join(output_dir, "{}.png".format(basename)) - visualize_detections(images[i], output_path, detections[i], labels, args.iou_threshold) + visualize_detections( + images[i], output_path, detections[i], labels, args.iou_threshold + ) # Text Results output_results = "" for d in detections[i]: - line = [d['xmin'], d['ymin'], d['xmax'], d['ymax'], d['score'], d['class']] + line = [ + d["xmin"], + d["ymin"], + d["xmax"], + d["ymax"], + d["score"], + d["class"], + ] output_results += "\t".join([str(f) for f in line]) + "\n" with open(os.path.join(args.output, "{}.txt".format(basename)), "w") as f: f.write(output_results) @@ -185,17 +285,41 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("-e", "--engine", default=None, help="The serialized TensorRT engine") - parser.add_argument("-i", "--input", default=None, help="Path to the image or directory to process") - parser.add_argument("-c", "--det2_config", help="The Detectron 2 config file (.yaml) for the model", type=str) - parser.add_argument("-o", "--output", default=None, help="Directory where to save the visualization results") - parser.add_argument("-t", "--nms_threshold", type=float, - help="Override the score threshold for the NMS operation, if higher than the threshold in the engine.") - parser.add_argument("--iou_threshold", default=0.5, type=float, - help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0") + parser.add_argument( + "-e", "--engine", default=None, help="The serialized TensorRT engine" + ) + parser.add_argument( + "-i", "--input", default=None, help="Path to the image or directory to process" + ) + parser.add_argument( + "-c", + "--det2_config", + help="The Detectron 2 config file (.yaml) for the model", + type=str, + ) + parser.add_argument( + "-o", + "--output", + default=None, + help="Directory where to save the visualization results", + ) + parser.add_argument( + "-t", + "--nms_threshold", + type=float, + help="Override the score threshold for the NMS operation, if higher than the threshold in the engine.", + ) + parser.add_argument( + "--iou_threshold", + default=0.5, + type=float, + help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0", + ) args = parser.parse_args() if not all([args.engine, args.input, args.output, args.det2_config]): parser.print_help() - print("\nThese arguments are required: --engine --input --output and --det2_config") + print( + "\nThese arguments are required: --engine --input --output and --det2_config" + ) sys.exit(1) main(args) diff --git a/samples/python/detectron2/onnx_utils.py b/samples/python/detectron2/onnx_utils.py index 56d280fa..2144fea0 100644 --- a/samples/python/detectron2/onnx_utils.py +++ b/samples/python/detectron2/onnx_utils.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,6 +23,7 @@ logging.getLogger("ModelHelper").setLevel(logging.INFO) log = logging.getLogger("ModelHelper") + @gs.Graph.register() def op_with_const(self, op, name, input, value): """ @@ -35,7 +36,10 @@ def op_with_const(self, op, name, input, value): input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created {} node '{}': {}".format(op, name, value.squeeze())) const = gs.Constant(name="{}_value:0".format(name), values=value) - return self.layer(name=name, op=op, inputs=[input_tensor, const], outputs=[name + ":0"]) + return self.layer( + name=name, op=op, inputs=[input_tensor, const], outputs=[name + ":0"] + ) + @gs.Graph.register() def matmul(self, name, input, value): @@ -48,7 +52,10 @@ def matmul(self, name, input, value): input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created {} node '{}': {}".format("MatMul", name, value.squeeze())) const = gs.Constant(name="{}_value:0".format(name), values=value) - return self.layer(name=name, op="MatMul", inputs=[input_tensor, const], outputs=[name + ":0"]) + return self.layer( + name=name, op="MatMul", inputs=[input_tensor, const], outputs=[name + ":0"] + ) + @gs.Graph.register() def clip(self, name, input, clip_min, clip_max): @@ -61,9 +68,19 @@ def clip(self, name, input, clip_min, clip_max): """ input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created {} node '{}".format("Clip", name)) - const_min = gs.Constant(name="{}_value:0".format(name), values=np.asarray([clip_min], dtype=np.float32)) - const_max = gs.Constant(name="{}_value:1".format(name), values=np.asarray([clip_max], dtype=np.float32)) - return self.layer(name=name, op="Clip", inputs=[input_tensor, const_min, const_max], outputs=[name + ":0"]) + const_min = gs.Constant( + name="{}_value:0".format(name), values=np.asarray([clip_min], dtype=np.float32) + ) + const_max = gs.Constant( + name="{}_value:1".format(name), values=np.asarray([clip_max], dtype=np.float32) + ) + return self.layer( + name=name, + op="Clip", + inputs=[input_tensor, const_min, const_max], + outputs=[name + ":0"], + ) + @gs.Graph.register() def slice(self, name, input, starts, ends, axes): @@ -79,10 +96,22 @@ def slice(self, name, input, starts, ends, axes): input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created {} node '{}".format("Slice", name)) - const_start = gs.Constant(name="{}_value:0".format(name), values=np.asarray([starts], dtype=np.int64)) - const_end = gs.Constant(name="{}_value:1".format(name), values=np.asarray([ends], dtype=np.int64)) - const_axes = gs.Constant(name="{}_value:2".format(name), values=np.asarray([axes], dtype=np.int64)) - return self.layer(name=name, op="Slice", inputs=[input_tensor, const_start, const_end, const_axes], outputs=[name + ":0"]) + const_start = gs.Constant( + name="{}_value:0".format(name), values=np.asarray([starts], dtype=np.int64) + ) + const_end = gs.Constant( + name="{}_value:1".format(name), values=np.asarray([ends], dtype=np.int64) + ) + const_axes = gs.Constant( + name="{}_value:2".format(name), values=np.asarray([axes], dtype=np.int64) + ) + return self.layer( + name=name, + op="Slice", + inputs=[input_tensor, const_start, const_end, const_axes], + outputs=[name + ":0"], + ) + @gs.Graph.register() def unsqueeze(self, name, input, axes=[3]): @@ -96,7 +125,14 @@ def unsqueeze(self, name, input, axes=[3]): """ input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created Unsqueeze node '{}': {}".format(name, axes)) - return self.layer(name=name, op="Unsqueeze", inputs=[input_tensor], outputs=[name + ":0"], attrs={'axes': axes}) + return self.layer( + name=name, + op="Unsqueeze", + inputs=[input_tensor], + outputs=[name + ":0"], + attrs={"axes": axes}, + ) + @gs.Graph.register() def squeeze(self, name, input, axes=[2]): @@ -110,7 +146,14 @@ def squeeze(self, name, input, axes=[2]): """ input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created Squeeze node '{}': {}".format(name, axes)) - return self.layer(name=name, op="Squeeze", inputs=[input_tensor], outputs=[name + ":0"], attrs={'axes': axes}) + return self.layer( + name=name, + op="Squeeze", + inputs=[input_tensor], + outputs=[name + ":0"], + attrs={"axes": axes}, + ) + @gs.Graph.register() def gather(self, name, data, indices, axes=0): @@ -125,7 +168,14 @@ def gather(self, name, data, indices, axes=0): data_tensor = data if type(data) is gs.Variable else data[0] indices_tensor = indices if type(indices) is gs.Variable else indices[0] log.debug("Created Gather node '{}': {}".format(name, axes)) - return self.layer(name=name, op="Gather", inputs=[data_tensor, indices_tensor], outputs=[name + ":0"], attrs={'axes': axes}) + return self.layer( + name=name, + op="Gather", + inputs=[data_tensor, indices_tensor], + outputs=[name + ":0"], + attrs={"axes": axes}, + ) + @gs.Graph.register() def transpose(self, name, input, perm): @@ -139,7 +189,14 @@ def transpose(self, name, input, perm): """ input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created Transpose node '{}': {}".format(name, perm)) - return self.layer(name=name, op="Transpose", inputs=[input_tensor], outputs=[name + ":0"], attrs={'perm': perm}) + return self.layer( + name=name, + op="Transpose", + inputs=[input_tensor], + outputs=[name + ":0"], + attrs={"perm": perm}, + ) + @gs.Graph.register() def sigmoid(self, name, input): @@ -152,7 +209,10 @@ def sigmoid(self, name, input): """ input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created Sigmoid node '{}'".format(name)) - return self.layer(name=name, op="Sigmoid", inputs=[input_tensor], outputs=[name + ":0"]) + return self.layer( + name=name, op="Sigmoid", inputs=[input_tensor], outputs=[name + ":0"] + ) + @gs.Graph.register() def plugin(self, op, name, inputs: list, outputs: list, attrs): @@ -170,6 +230,7 @@ def plugin(self, op, name, inputs: list, outputs: list, attrs): log.debug("Created TRT Plugin node '{}': {}".format(name, attrs)) return self.layer(op=op, name=name, inputs=inputs, outputs=outputs, attrs=attrs) + @gs.Graph.register() def find_node_by_op(self, op): """ @@ -183,6 +244,7 @@ def find_node_by_op(self, op): return node return None + @gs.Graph.register() def find_node_by_op_name(self, op, name): """ @@ -197,8 +259,11 @@ def find_node_by_op_name(self, op, name): return node return None + @gs.Graph.register() -def find_node_by_op_input_output_name(self, op, input_name, output_name, input_pos=0, output_pos=0): +def find_node_by_op_input_output_name( + self, op, input_name, output_name, input_pos=0, output_pos=0 +): """ Finds the first node in the graph with the given operation name. :param self: The gs.Graph object being extended. @@ -210,10 +275,15 @@ def find_node_by_op_input_output_name(self, op, input_name, output_name, input_p :return: The first node matching that performs that op. """ for node in self.nodes: - if node.op == op and node.inputs[input_pos].name == input_name and node.outputs[output_pos].name == output_name: + if ( + node.op == op + and node.inputs[input_pos].name == input_name + and node.outputs[output_pos].name == output_name + ): return node return None + @gs.Graph.register() def find_descendant_by_op(self, node, op, depth=10): """ @@ -237,6 +307,7 @@ def find_descendant_by_op(self, node, op, depth=10): queue.append(child) return None + @gs.Graph.register() def find_ancestor_by_op(self, node, op, depth=10): """ diff --git a/samples/python/detectron2/visualize.py b/samples/python/detectron2/visualize.py index dd8b6ead..00e930f1 100644 --- a/samples/python/detectron2/visualize.py +++ b/samples/python/detectron2/visualize.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,55 +22,177 @@ import PIL.ImageFilter as ImageFilter -COLORS = ['GoldenRod', 'MediumTurquoise', 'GreenYellow', 'SteelBlue', 'DarkSeaGreen', 'SeaShell', 'LightGrey', - 'IndianRed', 'DarkKhaki', 'LawnGreen', 'WhiteSmoke', 'Peru', 'LightCoral', 'FireBrick', 'OldLace', - 'LightBlue', 'SlateGray', 'OliveDrab', 'NavajoWhite', 'PaleVioletRed', 'SpringGreen', 'AliceBlue', 'Violet', - 'DeepSkyBlue', 'Red', 'MediumVioletRed', 'PaleTurquoise', 'Tomato', 'Azure', 'Yellow', 'Cornsilk', - 'Aquamarine', 'CadetBlue', 'CornflowerBlue', 'DodgerBlue', 'Olive', 'Orchid', 'LemonChiffon', 'Sienna', - 'OrangeRed', 'Orange', 'DarkSalmon', 'Magenta', 'Wheat', 'Lime', 'GhostWhite', 'SlateBlue', 'Aqua', - 'MediumAquaMarine', 'LightSlateGrey', 'MediumSeaGreen', 'SandyBrown', 'YellowGreen', 'Plum', 'FloralWhite', - 'LightPink', 'Thistle', 'DarkViolet', 'Pink', 'Crimson', 'Chocolate', 'DarkGrey', 'Ivory', 'PaleGreen', - 'DarkGoldenRod', 'LavenderBlush', 'SlateGrey', 'DeepPink', 'Gold', 'Cyan', 'LightSteelBlue', 'MediumPurple', - 'ForestGreen', 'DarkOrange', 'Tan', 'Salmon', 'PaleGoldenRod', 'LightGreen', 'LightSlateGray', 'HoneyDew', - 'Fuchsia', 'LightSeaGreen', 'DarkOrchid', 'Green', 'Chartreuse', 'LimeGreen', 'AntiqueWhite', 'Beige', - 'Gainsboro', 'Bisque', 'SaddleBrown', 'Silver', 'Lavender', 'Teal', 'LightCyan', 'PapayaWhip', 'Purple', - 'Coral', 'BurlyWood', 'LightGray', 'Snow', 'MistyRose', 'PowderBlue', 'DarkCyan', 'White', 'Turquoise', - 'MediumSlateBlue', 'PeachPuff', 'Moccasin', 'LightSalmon', 'SkyBlue', 'Khaki', 'MediumSpringGreen', - 'BlueViolet', 'MintCream', 'Linen', 'SeaGreen', 'HotPink', 'LightYellow', 'BlanchedAlmond', 'RoyalBlue', - 'RosyBrown', 'MediumOrchid', 'DarkTurquoise', 'LightGoldenRodYellow', 'LightSkyBlue'] +COLORS = [ + "GoldenRod", + "MediumTurquoise", + "GreenYellow", + "SteelBlue", + "DarkSeaGreen", + "SeaShell", + "LightGrey", + "IndianRed", + "DarkKhaki", + "LawnGreen", + "WhiteSmoke", + "Peru", + "LightCoral", + "FireBrick", + "OldLace", + "LightBlue", + "SlateGray", + "OliveDrab", + "NavajoWhite", + "PaleVioletRed", + "SpringGreen", + "AliceBlue", + "Violet", + "DeepSkyBlue", + "Red", + "MediumVioletRed", + "PaleTurquoise", + "Tomato", + "Azure", + "Yellow", + "Cornsilk", + "Aquamarine", + "CadetBlue", + "CornflowerBlue", + "DodgerBlue", + "Olive", + "Orchid", + "LemonChiffon", + "Sienna", + "OrangeRed", + "Orange", + "DarkSalmon", + "Magenta", + "Wheat", + "Lime", + "GhostWhite", + "SlateBlue", + "Aqua", + "MediumAquaMarine", + "LightSlateGrey", + "MediumSeaGreen", + "SandyBrown", + "YellowGreen", + "Plum", + "FloralWhite", + "LightPink", + "Thistle", + "DarkViolet", + "Pink", + "Crimson", + "Chocolate", + "DarkGrey", + "Ivory", + "PaleGreen", + "DarkGoldenRod", + "LavenderBlush", + "SlateGrey", + "DeepPink", + "Gold", + "Cyan", + "LightSteelBlue", + "MediumPurple", + "ForestGreen", + "DarkOrange", + "Tan", + "Salmon", + "PaleGoldenRod", + "LightGreen", + "LightSlateGray", + "HoneyDew", + "Fuchsia", + "LightSeaGreen", + "DarkOrchid", + "Green", + "Chartreuse", + "LimeGreen", + "AntiqueWhite", + "Beige", + "Gainsboro", + "Bisque", + "SaddleBrown", + "Silver", + "Lavender", + "Teal", + "LightCyan", + "PapayaWhip", + "Purple", + "Coral", + "BurlyWood", + "LightGray", + "Snow", + "MistyRose", + "PowderBlue", + "DarkCyan", + "White", + "Turquoise", + "MediumSlateBlue", + "PeachPuff", + "Moccasin", + "LightSalmon", + "SkyBlue", + "Khaki", + "MediumSpringGreen", + "BlueViolet", + "MintCream", + "Linen", + "SeaGreen", + "HotPink", + "LightYellow", + "BlanchedAlmond", + "RoyalBlue", + "RosyBrown", + "MediumOrchid", + "DarkTurquoise", + "LightGoldenRodYellow", + "LightSkyBlue", +] -#Overlay mask with transparency on top of the image. +# Overlay mask with transparency on top of the image. def overlay(image, mask, color, alpha_transparency=0.5): for channel in range(3): - image[:, :, channel] = np.where(mask == 1, - image[:, :, channel] * - (1 - alpha_transparency) + alpha_transparency * color[channel] * 255, - image[:, :, channel]) + image[:, :, channel] = np.where( + mask == 1, + image[:, :, channel] * (1 - alpha_transparency) + + alpha_transparency * color[channel] * 255, + image[:, :, channel], + ) return image -def visualize_detections(image_path, output_path, detections, labels=[], iou_threshold=0.5): - image = Image.open(image_path).convert(mode='RGB') + +def visualize_detections( + image_path, output_path, detections, labels=[], iou_threshold=0.5 +): + image = Image.open(image_path).convert(mode="RGB") # Get image dimensions. im_width, im_height = image.size line_width = 2 font = ImageFont.load_default() for d in detections: - color = COLORS[d['class'] % len(COLORS)] + color = COLORS[d["class"] % len(COLORS)] # Dynamically convert PIL color into RGB numpy array. - pixel_color = Image.new("RGB",(1, 1), color) + pixel_color = Image.new("RGB", (1, 1), color) # Normalize. - np_color = (np.asarray(pixel_color)[0][0])/255 + np_color = (np.asarray(pixel_color)[0][0]) / 255 # TRT instance segmentation masks. - if isinstance(d['mask'], np.ndarray) and d['mask'].shape == (28, 28): + if isinstance(d["mask"], np.ndarray) and d["mask"].shape == (28, 28): # PyTorch uses [x1,y1,x2,y2] format instead of regular [y1,x1,y2,x2]. - d['ymin'], d['xmin'], d['ymax'], d['xmax'] = d['xmin'], d['ymin'], d['xmax'], d['ymax'] + d["ymin"], d["xmin"], d["ymax"], d["xmax"] = ( + d["xmin"], + d["ymin"], + d["xmax"], + d["ymax"], + ) # Get detection bbox resolution. - det_width = round(d['xmax'] - d['xmin']) - det_height = round(d['ymax'] - d['ymin']) + det_width = round(d["xmax"] - d["xmin"]) + det_height = round(d["ymax"] - d["ymin"]) # Slight scaling, to get binary masks after float32 -> uint8 # conversion, if not scaled all pixels are zero. - mask = d['mask'] > iou_threshold + mask = d["mask"] > iou_threshold # Convert float32 -> uint8. mask = mask.astype(np.uint8) # Create an image out of predicted mask array. @@ -80,10 +202,10 @@ def visualize_detections(image_path, output_path, detections, labels=[], iou_thr # Create an original image sized template for correct mask placement. pad = Image.new("L", (im_width, im_height)) # Place your mask according to detection bbox placement. - pad.paste(mask, (round(d['xmin']), (round(d['ymin'])))) + pad.paste(mask, (round(d["xmin"]), (round(d["ymin"])))) # Reconvert mask into numpy array for evaluation. padded_mask = np.array(pad) - #Creat np.array from original image, copy in order to modify. + # Creat np.array from original image, copy in order to modify. image_copy = np.asarray(image).copy() # Image with overlaid mask. masked_image = overlay(image_copy, padded_mask, np_color) @@ -92,23 +214,42 @@ def visualize_detections(image_path, output_path, detections, labels=[], iou_thr # Bbox lines. draw = ImageDraw.Draw(image) - draw.line([(d['xmin'], d['ymin']), (d['xmin'], d['ymax']), (d['xmax'], d['ymax']), (d['xmax'], d['ymin']), - (d['xmin'], d['ymin'])], width=line_width, fill=color) - label = "Class {}".format(d['class']) - if d['class'] < len(labels): - label = "{}".format(labels[d['class']]) - score = d['score'] + draw.line( + [ + (d["xmin"], d["ymin"]), + (d["xmin"], d["ymax"]), + (d["xmax"], d["ymax"]), + (d["xmax"], d["ymin"]), + (d["xmin"], d["ymin"]), + ], + width=line_width, + fill=color, + ) + label = "Class {}".format(d["class"]) + if d["class"] < len(labels): + label = "{}".format(labels[d["class"]]) + score = d["score"] text = "{}: {}%".format(label, int(100 * score)) if score < 0: text = label left, top, right, bottom = font.getbbox(text) text_width, text_height = right - left, bottom - top - text_bottom = max(text_height, d['ymin']) - text_left = d['xmin'] + text_bottom = max(text_height, d["ymin"]) + text_left = d["xmin"] margin = np.ceil(0.05 * text_height) - draw.rectangle([(text_left, text_bottom - text_height - 2 * margin), (text_left + text_width, text_bottom)], - fill=color) - draw.text((text_left + margin, text_bottom - text_height - margin), text, fill='black', font=font) + draw.rectangle( + [ + (text_left, text_bottom - text_height - 2 * margin), + (text_left + text_width, text_bottom), + ], + fill=color, + ) + draw.text( + (text_left + margin, text_bottom - text_height - margin), + text, + fill="black", + font=font, + ) if output_path is None: return image image.save(output_path) diff --git a/samples/python/downloader.py b/samples/python/downloader.py index b4a436e2..5e8be202 100755 --- a/samples/python/downloader.py +++ b/samples/python/downloader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -87,8 +87,13 @@ def download(data_dir, yaml_path, overwrite=False): def _downloadFile(path, url): logger.info("Downloading %s from %s", path, url) import requests + from requests.adapters import HTTPAdapter, Retry + + session = requests.Session() + retries = Retry(total=5, backoff_factor=0.5) + session.mount("http://", HTTPAdapter(max_retries=retries)) + r = session.get(url, stream=True, timeout=10) - r = requests.get(url, stream=True, timeout=5) size = int(r.headers.get("content-length", 0)) from tqdm import tqdm @@ -124,7 +129,9 @@ def _downloadFile(path, url): def _parseArgs(): - parser = argparse.ArgumentParser(description="Downloader of TensorRT sample data files.") + parser = argparse.ArgumentParser( + description="Downloader of TensorRT sample data files." + ) parser.add_argument( "-d", "--data", @@ -137,7 +144,11 @@ def _parseArgs(): default="download.yml", ) parser.add_argument( - "-o", "--overwrite", help="Force to overwrite if MD5 check failed", action="store_true", default=False + "-o", + "--overwrite", + help="Force to overwrite if MD5 check failed", + action="store_true", + default=False, ) parser.add_argument( "-v", @@ -150,7 +161,9 @@ def _parseArgs(): args, _ = parser.parse_known_args() data = os.environ.get("TRT_DATA_DIR", None) if args.data is None else args.data if data is None: - raise ValueError("Data directory must be specified by either `-d $DATA` or environment variable $TRT_DATA_DIR.") + raise ValueError( + "Data directory must be specified by either `-d $DATA` or environment variable $TRT_DATA_DIR." + ) return data, args @@ -209,16 +222,22 @@ def getFilePath(path): """ global TRT_DATA_DIR if not TRT_DATA_DIR: - parser = argparse.ArgumentParser(description="Helper of data file download tool") + parser = argparse.ArgumentParser( + description="Helper of data file download tool" + ) parser.add_argument( "-d", "--data", help="Specify the data directory where it is saved in. $TRT_DATA_DIR will be overwritten by this argument.", ) args, _ = parser.parse_known_args() - TRT_DATA_DIR = os.environ.get("TRT_DATA_DIR", None) if args.data is None else args.data + TRT_DATA_DIR = ( + os.environ.get("TRT_DATA_DIR", None) if args.data is None else args.data + ) if TRT_DATA_DIR is None: - raise ValueError("Data directory must be specified by either `-d $DATA` or environment variable $TRT_DATA_DIR.") + raise ValueError( + "Data directory must be specified by either `-d $DATA` or environment variable $TRT_DATA_DIR." + ) fullpath = os.path.join(TRT_DATA_DIR, path) if not os.path.exists(fullpath): diff --git a/samples/python/efficientdet/build_engine.py b/samples/python/efficientdet/build_engine.py index 77143aad..58dd6d5c 100644 --- a/samples/python/efficientdet/build_engine.py +++ b/samples/python/efficientdet/build_engine.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -56,7 +56,10 @@ def set_image_batcher(self, image_batcher: ImageBatcher): :param image_batcher: The ImageBatcher object """ self.image_batcher = image_batcher - size = int(np.dtype(self.image_batcher.dtype).itemsize * np.prod(self.image_batcher.shape)) + size = int( + np.dtype(self.image_batcher.dtype).itemsize + * np.prod(self.image_batcher.shape) + ) self.batch_allocation = common.cuda_call(cudart.cudaMalloc(size)) self.batch_generator = self.image_batcher.get_batch() @@ -81,8 +84,14 @@ def get_batch(self, names): return None try: batch, _, _ = next(self.batch_generator) - log.info("Calibrating image {} / {}".format(self.image_batcher.image_index, self.image_batcher.num_images)) - common.memcpy_host_to_device(self.batch_allocation, np.ascontiguousarray(batch)) + log.info( + "Calibrating image {} / {}".format( + self.image_batcher.image_index, self.image_batcher.num_images + ) + ) + common.memcpy_host_to_device( + self.batch_allocation, np.ascontiguousarray(batch) + ) return [int(self.batch_allocation)] except StopIteration: log.info("Finished calibration batches") @@ -130,7 +139,9 @@ def __init__(self, verbose=False, workspace=8): self.builder = trt.Builder(self.trt_logger) self.config = self.builder.create_builder_config() - self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace * (2 ** 30)) + self.config.set_memory_pool_limit( + trt.MemoryPoolType.WORKSPACE, workspace * (2**30) + ) self.network = None self.parser = None @@ -161,29 +172,46 @@ def create_network(self, onnx_path, batch_size, dynamic_batch_size=None): profile = self.builder.create_optimization_profile() dynamic_inputs = False for input in inputs: - log.info("Input '{}' with shape {} and dtype {}".format(input.name, input.shape, input.dtype)) + log.info( + "Input '{}' with shape {} and dtype {}".format( + input.name, input.shape, input.dtype + ) + ) if input.shape[0] == -1: dynamic_inputs = True if dynamic_batch_size: if type(dynamic_batch_size) is str: - dynamic_batch_size = [int(v) for v in dynamic_batch_size.split(",")] + dynamic_batch_size = [ + int(v) for v in dynamic_batch_size.split(",") + ] assert len(dynamic_batch_size) == 3 min_shape = [dynamic_batch_size[0]] + list(input.shape[1:]) opt_shape = [dynamic_batch_size[1]] + list(input.shape[1:]) max_shape = [dynamic_batch_size[2]] + list(input.shape[1:]) profile.set_shape(input.name, min_shape, opt_shape, max_shape) - log.info("Input '{}' Optimization Profile with shape MIN {} / OPT {} / MAX {}".format( - input.name, min_shape, opt_shape, max_shape)) + log.info( + "Input '{}' Optimization Profile with shape MIN {} / OPT {} / MAX {}".format( + input.name, min_shape, opt_shape, max_shape + ) + ) else: shape = [batch_size] + list(input.shape[1:]) profile.set_shape(input.name, shape, shape, shape) - log.info("Input '{}' Optimization Profile with shape {}".format(input.name, shape)) + log.info( + "Input '{}' Optimization Profile with shape {}".format( + input.name, shape + ) + ) if dynamic_inputs: self.config.add_optimization_profile(profile) outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)] for output in outputs: - log.info("Output '{}' with shape {} and dtype {}".format(output.name, output.shape, output.dtype)) + log.info( + "Output '{}' with shape {} and dtype {}".format( + output.name, output.shape, output.dtype + ) + ) def set_mixed_precision(self): """ @@ -202,7 +230,8 @@ def set_mixed_precision(self): # add or remove blocks. for i in range(self.network.num_layers): layer = self.network.get_layer(i) - if layer.type == trt.LayerType.CONVOLUTION and any([ + if layer.type == trt.LayerType.CONVOLUTION and any( + [ # AutoML Layer Names: "/stem/" in layer.name, "/blocks_0/" in layer.name, @@ -213,12 +242,24 @@ def set_mixed_precision(self): "/stack_0/block_0/" in layer.name, "/stack_1/block_0/" in layer.name, "/stack_1/block_1/" in layer.name, - ]): + ] + ): self.network.get_layer(i).precision = trt.DataType.HALF - log.info("Mixed-Precision Layer {} set to HALF STRICT data type".format(layer.name)) - - def create_engine(self, engine_path, precision, calib_input=None, calib_cache=None, calib_num_images=5000, - calib_batch_size=8): + log.info( + "Mixed-Precision Layer {} set to HALF STRICT data type".format( + layer.name + ) + ) + + def create_engine( + self, + engine_path, + precision, + calib_input=None, + calib_cache=None, + calib_num_images=5000, + calib_batch_size=8, + ): """ Build the TensorRT engine and serialize it to disk. :param engine_path: The path where to serialize the engine to. @@ -251,8 +292,15 @@ def create_engine(self, engine_path, precision, calib_input=None, calib_cache=No calib_shape = [calib_batch_size] + list(inputs[0].shape[1:]) calib_dtype = trt.nptype(inputs[0].dtype) self.config.int8_calibrator.set_image_batcher( - ImageBatcher(calib_input, calib_shape, calib_dtype, max_num_images=calib_num_images, - exact_batches=True, shuffle_files=True)) + ImageBatcher( + calib_input, + calib_shape, + calib_dtype, + max_num_images=calib_num_images, + exact_batches=True, + shuffle_files=True, + ) + ) engine_bytes = self.builder.build_serialized_network(self.network, self.config) if engine_bytes is None: @@ -272,41 +320,88 @@ def main(args): builder.create_network(args.onnx, args.batch_size, args.dynamic_batch_size) if args.precision == "mixed": builder.set_mixed_precision() - builder.create_engine(args.engine, args.precision, args.calib_input, args.calib_cache, args.calib_num_images, - args.calib_batch_size) + builder.create_engine( + args.engine, + args.precision, + args.calib_input, + args.calib_cache, + args.calib_num_images, + args.calib_batch_size, + ) if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("-o", "--onnx", required=True, - help="The input ONNX model file to load") - parser.add_argument("-e", "--engine", required=True, - help="The output path for the TRT engine") - parser.add_argument("-b", "--batch_size", default=1, type=int, - help="The static batch size to build the engine with, default: 1") - parser.add_argument("-d", "--dynamic_batch_size", default=None, - help="Enable dynamic batch size by providing a comma-separated MIN,OPT,MAX batch size, " - "if this option is set, --batch_size is ignored, example: -d 1,16,32, " - "default: None, build static engine") - parser.add_argument("-p", "--precision", default="fp16", choices=["fp32", "fp16", "int8", "mixed"], - help="The precision mode to build in, either fp32/fp16/int8/mixed, default: fp16") - parser.add_argument("-v", "--verbose", action="store_true", - help="Enable more verbose log output") - parser.add_argument("-w", "--workspace", default=8, type=int, - help="The max memory workspace size to allow in Gb, default: 8") - parser.add_argument("--calib_input", - help="The directory holding images to use for calibration") - parser.add_argument("--calib_cache", default=None, - help="The file path for INT8 calibration cache to use, default: ./calibration.cache") - parser.add_argument("--calib_num_images", default=5000, type=int, - help="The maximum number of images to use for calibration, default: 5000") - parser.add_argument("--calib_batch_size", default=8, type=int, - help="The batch size for the calibration process, default: 8") - parser.add_argument("--timing_cache", default="./timing.cache", - help="The file path for timing cache, default: ./timing.cache") + parser.add_argument( + "-o", "--onnx", required=True, help="The input ONNX model file to load" + ) + parser.add_argument( + "-e", "--engine", required=True, help="The output path for the TRT engine" + ) + parser.add_argument( + "-b", + "--batch_size", + default=1, + type=int, + help="The static batch size to build the engine with, default: 1", + ) + parser.add_argument( + "-d", + "--dynamic_batch_size", + default=None, + help="Enable dynamic batch size by providing a comma-separated MIN,OPT,MAX batch size, " + "if this option is set, --batch_size is ignored, example: -d 1,16,32, " + "default: None, build static engine", + ) + parser.add_argument( + "-p", + "--precision", + default="fp16", + choices=["fp32", "fp16", "int8", "mixed"], + help="The precision mode to build in, either fp32/fp16/int8/mixed, default: fp16", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="Enable more verbose log output" + ) + parser.add_argument( + "-w", + "--workspace", + default=8, + type=int, + help="The max memory workspace size to allow in Gb, default: 8", + ) + parser.add_argument( + "--calib_input", help="The directory holding images to use for calibration" + ) + parser.add_argument( + "--calib_cache", + default=None, + help="The file path for INT8 calibration cache to use, default: ./calibration.cache", + ) + parser.add_argument( + "--calib_num_images", + default=5000, + type=int, + help="The maximum number of images to use for calibration, default: 5000", + ) + parser.add_argument( + "--calib_batch_size", + default=8, + type=int, + help="The batch size for the calibration process, default: 8", + ) + parser.add_argument( + "--timing_cache", + default="./timing.cache", + help="The file path for timing cache, default: ./timing.cache", + ) args = parser.parse_args() - if args.precision in ["int8", "mixed"] and not (args.calib_input or os.path.exists(args.calib_cache)): + if args.precision in ["int8", "mixed"] and not ( + args.calib_input or os.path.exists(args.calib_cache) + ): parser.print_help() - log.error("When building in int8 or mixed precision, --calib_input or an existing --calib_cache file is required") + log.error( + "When building in int8 or mixed precision, --calib_input or an existing --calib_cache file is required" + ) sys.exit(1) main(args) diff --git a/samples/python/efficientdet/compare_tf.py b/samples/python/efficientdet/compare_tf.py index 54c356cd..4e4b91fc 100644 --- a/samples/python/efficientdet/compare_tf.py +++ b/samples/python/efficientdet/compare_tf.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -35,7 +35,12 @@ def run(batcher, inferer, framework, nms_threshold=None): for batch, images, scales in batcher.get_batch(): res_detections += inferer.process(batch, scales, nms_threshold) res_images += images - print("Processing {} / {} images ({})".format(batcher.image_index, batcher.num_images, framework), end="\r") + print( + "Processing {} / {} images ({})".format( + batcher.image_index, batcher.num_images, framework + ), + end="\r", + ) print() return res_images, res_detections @@ -62,7 +67,15 @@ def parse_annotations(annotations_path): return annotations -def compare_images(tf_images, tf_detections, trt_images, trt_detections, output_dir, annotations_path, labels_path): +def compare_images( + tf_images, + tf_detections, + trt_images, + trt_detections, + output_dir, + annotations_path, + labels_path, +): labels = [] if labels_path and os.path.exists(labels_path): with open(labels_path) as f: @@ -72,7 +85,9 @@ def compare_images(tf_images, tf_detections, trt_images, trt_detections, output_ annotations = parse_annotations(annotations_path) count = 1 - for tf_img, tf_det, trt_img, trt_det in zip(tf_images, tf_detections, trt_images, trt_detections): + for tf_img, tf_det, trt_img, trt_det in zip( + tf_images, tf_detections, trt_images, trt_detections + ): vis = [] names = [] colors = [] @@ -90,18 +105,27 @@ def compare_images(tf_images, tf_detections, trt_images, trt_detections, output_ if img_id.isnumeric(): img_id = int(img_id) if img_id in annotations.keys(): - vis.append(visualize_detections(trt_img, None, annotations[img_id], labels)) + vis.append( + visualize_detections(trt_img, None, annotations[img_id], labels) + ) names.append("Ground Truth") colors.append("RoyalBlue") else: - print("Image {} does not have a COCO annotation, skipping ground truth visualization".format(trt_img)) + print( + "Image {} does not have a COCO annotation, skipping ground truth visualization".format( + trt_img + ) + ) basename = os.path.splitext(os.path.basename(tf_img))[0] output_path = os.path.join(output_dir, "{}.compare.png".format(basename)) os.makedirs(output_dir, exist_ok=True) concat_visualizations(vis, names, colors, output_path) - print("Processing {} / {} images (Visualization)".format(count, len(tf_images)), end="\r") + print( + "Processing {} / {} images (Visualization)".format(count, len(tf_images)), + end="\r", + ) count += 1 print() @@ -110,32 +134,80 @@ def main(args): tf_infer = TensorFlowInfer(args.saved_model) trt_infer = TensorRTInfer(args.engine) - trt_batcher = ImageBatcher(args.input, *trt_infer.input_spec(), max_num_images=args.num_images) - tf_infer.override_input_shape(0, [1, trt_batcher.height, trt_batcher.width, 3]) # Same size input in TF as TRT - tf_batcher = ImageBatcher(args.input, *tf_infer.input_spec(), max_num_images=args.num_images) - - tf_images, tf_detections = run(tf_batcher, tf_infer, "TensorFlow", args.nms_threshold) - trt_images, trt_detections = run(trt_batcher, trt_infer, "TensorRT", args.nms_threshold) - - compare_images(tf_images, tf_detections, trt_images, trt_detections, args.output, args.annotations, args.labels) + trt_batcher = ImageBatcher( + args.input, *trt_infer.input_spec(), max_num_images=args.num_images + ) + tf_infer.override_input_shape( + 0, [1, trt_batcher.height, trt_batcher.width, 3] + ) # Same size input in TF as TRT + tf_batcher = ImageBatcher( + args.input, *tf_infer.input_spec(), max_num_images=args.num_images + ) + + tf_images, tf_detections = run( + tf_batcher, tf_infer, "TensorFlow", args.nms_threshold + ) + trt_images, trt_detections = run( + trt_batcher, trt_infer, "TensorRT", args.nms_threshold + ) + + compare_images( + tf_images, + tf_detections, + trt_images, + trt_detections, + args.output, + args.annotations, + args.labels, + ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with") - parser.add_argument("-m", "--saved_model", help="The TensorFlow saved model path to validate against") - parser.add_argument("-i", "--input", - help="The input to infer, either a single image path, or a directory of images") - parser.add_argument("-o", "--output", default=None, help="Directory where to save the visualization results") - parser.add_argument("-l", "--labels", default="./labels_coco.txt", - help="File to use for reading the class labels from, default: ./labels_coco.txt") - parser.add_argument("-a", "--annotations", default=None, - help="Set the path to the 'instances_val2017.json' file to use for COCO annotations, in which " - "case --input should point to the COCO val2017 dataset, default: not used") - parser.add_argument("-n", "--num_images", default=100, type=int, - help="The maximum number of images to visualize, default: 100") - parser.add_argument("-t", "--nms_threshold", type=float, help="Override the score threshold for the NMS operation, " - "if higher than the threshold in the model/engine.") + parser.add_argument( + "-m", + "--saved_model", + help="The TensorFlow saved model path to validate against", + ) + parser.add_argument( + "-i", + "--input", + help="The input to infer, either a single image path, or a directory of images", + ) + parser.add_argument( + "-o", + "--output", + default=None, + help="Directory where to save the visualization results", + ) + parser.add_argument( + "-l", + "--labels", + default="./labels_coco.txt", + help="File to use for reading the class labels from, default: ./labels_coco.txt", + ) + parser.add_argument( + "-a", + "--annotations", + default=None, + help="Set the path to the 'instances_val2017.json' file to use for COCO annotations, in which " + "case --input should point to the COCO val2017 dataset, default: not used", + ) + parser.add_argument( + "-n", + "--num_images", + default=100, + type=int, + help="The maximum number of images to visualize, default: 100", + ) + parser.add_argument( + "-t", + "--nms_threshold", + type=float, + help="Override the score threshold for the NMS operation, " + "if higher than the threshold in the model/engine.", + ) args = parser.parse_args() if not all([args.engine, args.saved_model, args.input, args.output]): parser.print_help() diff --git a/samples/python/efficientdet/create_onnx.py b/samples/python/efficientdet/create_onnx.py index 17fee5f6..ffe83094 100644 --- a/samples/python/efficientdet/create_onnx.py +++ b/samples/python/efficientdet/create_onnx.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -52,8 +52,12 @@ def __init__(self, saved_model_path): with tf.Graph().as_default() as tf_graph: tf.import_graph_def(graph_def, name="") with tf_loader.tf_session(graph=tf_graph): - onnx_graph = tfonnx.process_tf_graph(tf_graph, input_names=inputs, output_names=outputs, opset=11) - onnx_model = optimizer.optimize_graph(onnx_graph).make_model("Converted from {}".format(saved_model_path)) + onnx_graph = tfonnx.process_tf_graph( + tf_graph, input_names=inputs, output_names=outputs, opset=11 + ) + onnx_model = optimizer.optimize_graph(onnx_graph).make_model( + "Converted from {}".format(saved_model_path) + ) self.graph = gs.import_onnx(onnx_model) assert self.graph log.info("TF2ONNX graph created successfully") @@ -65,7 +69,16 @@ def __init__(self, saved_model_path): self.api = None if len([node for node in self.graph.nodes if "class_net/" in node.name]) > 0: self.api = "AutoML" - elif len([node for node in self.graph.nodes if "/WeightSharedConvolutionalClassHead/" in node.name]) > 0: + elif ( + len( + [ + node + for node in self.graph.nodes + if "/WeightSharedConvolutionalClassHead/" in node.name + ] + ) + > 0 + ): self.api = "TFOD" assert self.api log.info("Graph was detected as {}".format(self.api)) @@ -87,7 +100,9 @@ def sanitize(self): model = shape_inference.infer_shapes(model) self.graph = gs.import_onnx(model) except Exception as e: - log.info("Shape inference could not be performed at this time:\n{}".format(e)) + log.info( + "Shape inference could not be performed at this time:\n{}".format(e) + ) try: self.graph.fold_constants(fold_shapes=True) except TypeError as e: @@ -130,41 +145,63 @@ def update_preprocessor(self, input_format, input_size, preprocessor="imagenet") assert input_size[i] >= 1 assert input_format in ["NCHW", "NHWC"] if input_format == "NCHW": - self.graph.inputs[0].shape = ['N', 3, input_size[0], input_size[1]] + self.graph.inputs[0].shape = ["N", 3, input_size[0], input_size[1]] if input_format == "NHWC": - self.graph.inputs[0].shape = ['N', input_size[0], input_size[1], 3] + self.graph.inputs[0].shape = ["N", input_size[0], input_size[1], 3] self.graph.inputs[0].dtype = np.float32 self.graph.inputs[0].name = "input" - log.info("ONNX graph input shape: {} [{} format]".format(self.graph.inputs[0].shape, input_format)) + log.info( + "ONNX graph input shape: {} [{} format]".format( + self.graph.inputs[0].shape, input_format + ) + ) self.sanitize() # Find the initial nodes of the graph, whatever the input is first connected to, and disconnect them - for node in [node for node in self.graph.nodes if self.graph.inputs[0] in node.inputs]: + for node in [ + node for node in self.graph.nodes if self.graph.inputs[0] in node.inputs + ]: node.inputs.clear() # Convert to NCHW format if needed input_tensor = self.graph.inputs[0] if input_format == "NHWC": - input_tensor = self.graph.transpose("preprocessor/transpose", input_tensor, [0, 3, 1, 2]) + input_tensor = self.graph.transpose( + "preprocessor/transpose", input_tensor, [0, 3, 1, 2] + ) assert preprocessor in ["imagenet", "scale_range"] preprocessed_tensor = None if preprocessor == "imagenet": # RGB Normalizers. The per-channel values are given with shape [1, 3, 1, 1] for proper NCHW shape broadcasting scale_val = 1 / np.asarray([255], dtype=np.float32) - mean_val = -1 * np.expand_dims(np.asarray([0.485, 0.456, 0.406], dtype=np.float32), axis=(0, 2, 3)) - stddev_val = 1 / np.expand_dims(np.asarray([0.229, 0.224, 0.225], dtype=np.float32), axis=(0, 2, 3)) + mean_val = -1 * np.expand_dims( + np.asarray([0.485, 0.456, 0.406], dtype=np.float32), axis=(0, 2, 3) + ) + stddev_val = 1 / np.expand_dims( + np.asarray([0.229, 0.224, 0.225], dtype=np.float32), axis=(0, 2, 3) + ) # y = (x * scale + mean) * stddev --> y = x * scale * stddev + mean * stddev - scale_out = self.graph.elt_const("Mul", "preprocessor/scale", input_tensor, scale_val * stddev_val) - mean_out = self.graph.elt_const("Add", "preprocessor/mean", scale_out, mean_val * stddev_val) + scale_out = self.graph.elt_const( + "Mul", "preprocessor/scale", input_tensor, scale_val * stddev_val + ) + mean_out = self.graph.elt_const( + "Add", "preprocessor/mean", scale_out, mean_val * stddev_val + ) preprocessed_tensor = mean_out[0] if preprocessor == "scale_range": # RGB Normalizers. The per-channel values are given with shape [1, 3, 1, 1] for proper NCHW shape broadcasting scale_val = 2 / np.asarray([255], dtype=np.float32) - offset_val = np.expand_dims(np.asarray([-1, -1, -1], dtype=np.float32), axis=(0, 2, 3)) + offset_val = np.expand_dims( + np.asarray([-1, -1, -1], dtype=np.float32), axis=(0, 2, 3) + ) # y = (x * scale + mean) * stddev --> y = x * scale * stddev + mean * stddev - scale_out = self.graph.elt_const("Mul", "preprocessor/scale", input_tensor, scale_val) - range_out = self.graph.elt_const("Add", "preprocessor/range", scale_out, offset_val) + scale_out = self.graph.elt_const( + "Mul", "preprocessor/scale", input_tensor, scale_val + ) + range_out = self.graph.elt_const( + "Add", "preprocessor/range", scale_out, offset_val + ) preprocessed_tensor = range_out[0] # Find the first stem conv node of the graph, and connect the normalizer directly to it @@ -173,7 +210,11 @@ def update_preprocessor(self, input_format, input_size, preprocessor="imagenet") stem_name = "/stem/" if self.api == "TFOD": stem_name = "/stem_conv2d/" - stem = [node for node in self.graph.nodes if node.op == "Conv" and stem_name in node.name][0] + stem = [ + node + for node in self.graph.nodes + if node.op == "Conv" and stem_name in node.name + ][0] log.info("Found {} node '{}' as stem entry".format(stem.op, stem.name)) stem.inputs[0] = preprocessed_tensor @@ -184,7 +225,10 @@ def update_shapes(self): # Output-Head reshapes use [1, -1, C], corrected reshape value should be [-1, V, C] for node in [node for node in self.graph.nodes if node.op == "Reshape"]: shape_in = node.inputs[0].shape - if shape_in is None or len(shape_in) not in [4,5]: # TFOD graphs have 5-dim inputs on this Reshape + if shape_in is None or len(shape_in) not in [ + 4, + 5, + ]: # TFOD graphs have 5-dim inputs on this Reshape continue if type(node.inputs[1]) != gs.Constant: continue @@ -195,15 +239,29 @@ def update_shapes(self): if len(shape_in) == 5: volume *= shape_in[4] shape_corrected = np.asarray([-1, volume, shape_out[2]], dtype=np.int64) - node.inputs[1] = gs.Constant("{}_shape".format(node.name), values=shape_corrected) - log.info("Updating Output-Head Reshape node {} to {}".format(node.name, node.inputs[1].values)) + node.inputs[1] = gs.Constant( + "{}_shape".format(node.name), values=shape_corrected + ) + log.info( + "Updating Output-Head Reshape node {} to {}".format( + node.name, node.inputs[1].values + ) + ) # Other Reshapes only need to change the first dim to -1, as long as there are no -1's already for node in [node for node in self.graph.nodes if node.op == "Reshape"]: - if type(node.inputs[1]) != gs.Constant or node.inputs[1].values[0] != 1 or -1 in node.inputs[1].values: + if ( + type(node.inputs[1]) != gs.Constant + or node.inputs[1].values[0] != 1 + or -1 in node.inputs[1].values + ): continue node.inputs[1].values[0] = -1 - log.info("Updating Reshape node {} to {}".format(node.name, node.inputs[1].values)) + log.info( + "Updating Reshape node {} to {}".format( + node.name, node.inputs[1].values + ) + ) # Resize nodes try to calculate the output shape dynamically, it's more optimal to pre-compute the shape if self.api == "AutoML": @@ -223,13 +281,18 @@ def update_shapes(self): concat = node.i(3) if concat.op != "Concat": continue - if type(concat.inputs[1]) != gs.Constant or len(concat.inputs[1].values) != 2: + if ( + type(concat.inputs[1]) != gs.Constant + or len(concat.inputs[1].values) != 2 + ): continue scale_h = concat.inputs[1].values[0] / node.inputs[0].shape[2] scale_w = concat.inputs[1].values[1] / node.inputs[0].shape[3] scales = np.asarray([1, 1, scale_h, scale_w], dtype=np.float32) del node.inputs[3] - node.inputs[2] = gs.Constant(name="{}_scales".format(node.name), values=scales) + node.inputs[2] = gs.Constant( + name="{}_scales".format(node.name), values=scales + ) log.info("Updating Resize node {} to {}".format(node.name, scales)) self.sanitize() @@ -241,7 +304,9 @@ def update_network(self): """ if self.api == "TFOD": - for reduce in [node for node in self.graph.nodes if node.op == "ReduceMean"]: + for reduce in [ + node for node in self.graph.nodes if node.op == "ReduceMean" + ]: # TFOD models have their ReduceMean nodes applied with some redundant transposes that can be # optimized away for better performance # Make sure the correct subgraph is being replaced, basically search for this: @@ -249,19 +314,30 @@ def update_network(self): # And change to this: # X > ReduceMean (2,3) > Conv > Y transpose = reduce.i() - if transpose.op != "Transpose" or transpose.attrs['perm'] != [0, 2, 3, 1]: + if transpose.op != "Transpose" or transpose.attrs["perm"] != [ + 0, + 2, + 3, + 1, + ]: continue - if len(reduce.attrs['axes']) != 2 or reduce.attrs['axes'] != [1, 2]: + if len(reduce.attrs["axes"]) != 2 or reduce.attrs["axes"] != [1, 2]: continue reshape1 = reduce.o() if reshape1.op != "Reshape" or len(reshape1.inputs[1].values) != 4: continue - if reshape1.inputs[1].values[1] != 1 or reshape1.inputs[1].values[2] != 1: + if ( + reshape1.inputs[1].values[1] != 1 + or reshape1.inputs[1].values[2] != 1 + ): continue reshape2 = reshape1.o() if reshape2.op != "Reshape" or len(reshape2.inputs[1].values) != 4: continue - if reshape2.inputs[1].values[2] != 1 or reshape2.inputs[1].values[3] != 1: + if ( + reshape2.inputs[1].values[2] != 1 + or reshape2.inputs[1].values[3] != 1 + ): continue conv = reshape2.o() if conv.op != "Conv": @@ -269,12 +345,21 @@ def update_network(self): # If all the checks above pass, then this node sequence can be optimized by just the ReduceMean itself # operating on a different set of axes input_tensor = transpose.inputs[0] # Input tensor to the Transpose - reduce.inputs[0] = input_tensor # Forward the Transpose input to the ReduceMean node + reduce.inputs[0] = ( + input_tensor # Forward the Transpose input to the ReduceMean node + ) output_tensor = reduce.outputs[0] # Output tensor of the ReduceMean - conv.inputs[0] = output_tensor # Forward the ReduceMean output to the Conv node - reduce.attrs["axes"] = [2, 3] # Update the axes that ReduceMean operates on + conv.inputs[0] = ( + output_tensor # Forward the ReduceMean output to the Conv node + ) + reduce.attrs["axes"] = [ + 2, + 3, + ] # Update the axes that ReduceMean operates on reduce.attrs["keepdims"] = 1 # Keep the reduced dimensions - log.info("Optimized subgraph around ReduceMean node '{}'".format(reduce.name)) + log.info( + "Optimized subgraph around ReduceMean node '{}'".format(reduce.name) + ) def update_nms(self, threshold=None, detections=None): """ @@ -290,10 +375,18 @@ def find_head_concat(name_scope): # and the concatenated Box Net node has the shape [batch_size, num_anchors, 4]. # These concatenation nodes can be be found by searching for all Concat's and checking if the node two # steps above in the graph has a name that begins with either "box_net/..." or "class_net/...". - for node in [node for node in self.graph.nodes if node.op == "Transpose" and name_scope in node.name]: + for node in [ + node + for node in self.graph.nodes + if node.op == "Transpose" and name_scope in node.name + ]: concat = self.graph.find_descendant_by_op(node, "Concat") assert concat and len(concat.inputs) == 5 - log.info("Found {} node '{}' as the tip of {}".format(concat.op, concat.name, name_scope)) + log.info( + "Found {} node '{}' as the tip of {}".format( + concat.op, concat.name, name_scope + ) + ) return concat def extract_anchors_tensor(split): @@ -319,7 +412,9 @@ def get_anchor_np(output_idx, op): anchors_x = get_anchor_np(1, "Add") anchors_h = get_anchor_np(2, "Mul") anchors_w = get_anchor_np(3, "Mul") - anchors = np.concatenate([anchors_y, anchors_x, anchors_h, anchors_w], axis=2) + anchors = np.concatenate( + [anchors_y, anchors_x, anchors_h, anchors_w], axis=2 + ) return gs.Constant(name="nms/anchors:0", values=anchors) self.sanitize() @@ -328,7 +423,10 @@ def get_anchor_np(output_idx, op): if self.api == "AutoML": head_names = ["class_net/", "box_net/"] if self.api == "TFOD": - head_names = ["/WeightSharedConvolutionalClassHead/", "/WeightSharedConvolutionalBoxHead/"] + head_names = [ + "/WeightSharedConvolutionalClassHead/", + "/WeightSharedConvolutionalBoxHead/", + ] # There are five nodes at the bottom of the graph that provide important connection points: @@ -353,9 +451,13 @@ def get_anchor_np(output_idx, op): nms_node = self.graph.find_node_by_op("NonMaxSuppression") # Extract NMS Configuration - num_detections = int(nms_node.inputs[2].values) if detections is None else detections + num_detections = ( + int(nms_node.inputs[2].values) if detections is None else detections + ) iou_threshold = float(nms_node.inputs[3].values) - score_threshold = float(nms_node.inputs[4].values) if threshold is None else threshold + score_threshold = ( + float(nms_node.inputs[4].values) if threshold is None else threshold + ) num_classes = class_net.i().inputs[1].values[-1] normalized = True if self.api == "TFOD" else False @@ -380,27 +482,41 @@ def get_anchor_np(output_idx, op): nms_inputs = [box_net_tensor, class_net_tensor, anchors_tensor] nms_op = "EfficientNMS_TRT" nms_attrs = { - 'plugin_version': "1", - 'background_class': -1, - 'max_output_boxes': num_detections, - 'score_threshold': max(0.01, score_threshold), # Keep threshold to at least 0.01 for better efficiency - 'iou_threshold': iou_threshold, - 'score_activation': True, - 'class_agnostic': False, - 'box_coding': 1, + "plugin_version": "1", + "background_class": -1, + "max_output_boxes": num_detections, + "score_threshold": max( + 0.01, score_threshold + ), # Keep threshold to at least 0.01 for better efficiency + "iou_threshold": iou_threshold, + "score_activation": True, + "class_agnostic": False, + "box_coding": 1, } nms_output_classes_dtype = np.int32 # NMS Outputs - nms_output_num_detections = gs.Variable(name="num_detections", dtype=np.int32, shape=['N', 1]) - nms_output_boxes = gs.Variable(name="detection_boxes", dtype=np.float32, - shape=['N', num_detections, 4]) - nms_output_scores = gs.Variable(name="detection_scores", dtype=np.float32, - shape=['N', num_detections]) - nms_output_classes = gs.Variable(name="detection_classes", dtype=nms_output_classes_dtype, - shape=['N', num_detections]) + nms_output_num_detections = gs.Variable( + name="num_detections", dtype=np.int32, shape=["N", 1] + ) + nms_output_boxes = gs.Variable( + name="detection_boxes", dtype=np.float32, shape=["N", num_detections, 4] + ) + nms_output_scores = gs.Variable( + name="detection_scores", dtype=np.float32, shape=["N", num_detections] + ) + nms_output_classes = gs.Variable( + name="detection_classes", + dtype=nms_output_classes_dtype, + shape=["N", num_detections], + ) - nms_outputs = [nms_output_num_detections, nms_output_boxes, nms_output_scores, nms_output_classes] + nms_outputs = [ + nms_output_num_detections, + nms_output_boxes, + nms_output_scores, + nms_output_classes, + ] # Create the NMS Plugin node with the selected inputs. The outputs of the node will also become the final # outputs of the graph. @@ -409,8 +525,11 @@ def get_anchor_np(output_idx, op): name="nms/non_maximum_suppression", inputs=nms_inputs, outputs=nms_outputs, - attrs=nms_attrs) - log.info("Created NMS plugin '{}' with attributes: {}".format(nms_op, nms_attrs)) + attrs=nms_attrs, + ) + log.info( + "Created NMS plugin '{}' with attributes: {}".format(nms_op, nms_attrs) + ) self.graph.outputs = nms_outputs @@ -430,25 +549,54 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("-m", "--saved_model", required=True, - help="The TensorFlow saved model directory to load") - parser.add_argument("-o", "--onnx", required=True, - help="The output ONNX model file to write") - parser.add_argument("-f", "--input_format", default="NHWC", choices=["NHWC", "NCHW"], - help="Set the input data format of the graph, either NCHW or NHWC, default: NHWC") - parser.add_argument("-i", "--input_size", default="512,512", - help="Set the input shape of the graph, as a comma-separated dimensions in H,W format, " - "default: 512,512") - parser.add_argument("-p", "--preprocessor", default="imagenet", choices=["imagenet", "scale_range"], - help="Set the preprocessor to apply on the graph, either 'imagenet' for standard mean " - "subtraction and stdev normalization, or 'scale_range' for uniform [-1,+1] " - "normalization as is used in the AdvProp models, default: imagenet") - parser.add_argument("-t", "--nms_threshold", type=float, - help="Override the NMS score threshold, default: use the original value in the model") - parser.add_argument("-d", "--nms_detections", type=int, - help="Override the NMS max detections, default: use the original value in the model") - parser.add_argument("--tf2onnx", - help="The path where to save the intermediate ONNX graph generated by tf2onnx, useful" - "for graph debugging purposes, default: not saved") + parser.add_argument( + "-m", + "--saved_model", + required=True, + help="The TensorFlow saved model directory to load", + ) + parser.add_argument( + "-o", "--onnx", required=True, help="The output ONNX model file to write" + ) + parser.add_argument( + "-f", + "--input_format", + default="NHWC", + choices=["NHWC", "NCHW"], + help="Set the input data format of the graph, either NCHW or NHWC, default: NHWC", + ) + parser.add_argument( + "-i", + "--input_size", + default="512,512", + help="Set the input shape of the graph, as a comma-separated dimensions in H,W format, " + "default: 512,512", + ) + parser.add_argument( + "-p", + "--preprocessor", + default="imagenet", + choices=["imagenet", "scale_range"], + help="Set the preprocessor to apply on the graph, either 'imagenet' for standard mean " + "subtraction and stdev normalization, or 'scale_range' for uniform [-1,+1] " + "normalization as is used in the AdvProp models, default: imagenet", + ) + parser.add_argument( + "-t", + "--nms_threshold", + type=float, + help="Override the NMS score threshold, default: use the original value in the model", + ) + parser.add_argument( + "-d", + "--nms_detections", + type=int, + help="Override the NMS max detections, default: use the original value in the model", + ) + parser.add_argument( + "--tf2onnx", + help="The path where to save the intermediate ONNX graph generated by tf2onnx, useful" + "for graph debugging purposes, default: not saved", + ) args = parser.parse_args() main(args) diff --git a/samples/python/efficientdet/eval_coco.py b/samples/python/efficientdet/eval_coco.py index 966f49be..d6796ac0 100644 --- a/samples/python/efficientdet/eval_coco.py +++ b/samples/python/efficientdet/eval_coco.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -31,15 +31,24 @@ def main(args): try: import coco_metric except ImportError: - print("Could not import the 'coco_metric' module from AutoML. Searching in: {}".format(automl_path)) - print("Please clone the repository https://github.com/google/automl and provide its path with --automl_path.") + print( + "Could not import the 'coco_metric' module from AutoML. Searching in: {}".format( + automl_path + ) + ) + print( + "Please clone the repository https://github.com/google/automl and provide its path with --automl_path." + ) sys.exit(1) trt_infer = TensorRTInfer(args.engine) batcher = ImageBatcher(args.input, *trt_infer.input_spec()) evaluator = coco_metric.EvaluationMetric(filename=args.annotations) for batch, images, scales in batcher.get_batch(): - print("Processing Image {} / {}".format(batcher.image_index, batcher.num_images), end="\r") + print( + "Processing Image {} / {}".format(batcher.image_index, batcher.num_images), + end="\r", + ) detections = trt_infer.process(batch, scales, args.nms_threshold) coco_det = np.zeros((len(images), max([len(d) for d in detections]), 7)) coco_det[:, :, -1] = -1 @@ -54,7 +63,8 @@ def main(args): det["xmax"] - det["xmin"], det["ymax"] - det["ymin"], det["score"], - det["class"] + 1, # The COCO evaluator expects class 0 to be background, so offset by 1 + det["class"] + + 1, # The COCO evaluator expects class 0 to be background, so offset by 1 ] evaluator.update_state(None, coco_det) print() @@ -64,14 +74,30 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with") - parser.add_argument("-i", "--input", - help="The input to infer, either a single image path, or a directory of images") - parser.add_argument("-a", "--annotations", help="Set the path to the COCO 'instances_val2017.json' file") - parser.add_argument("-p", "--automl_path", default="./automl", - help="Set the path where to find the AutoML repository, from " - "https://github.com/google/automl. Default: ./automl") - parser.add_argument("-t", "--nms_threshold", type=float, help="Override the score threshold for the NMS operation, " - "if higher than the threshold in the engine.") + parser.add_argument( + "-i", + "--input", + help="The input to infer, either a single image path, or a directory of images", + ) + parser.add_argument( + "-a", + "--annotations", + help="Set the path to the COCO 'instances_val2017.json' file", + ) + parser.add_argument( + "-p", + "--automl_path", + default="./automl", + help="Set the path where to find the AutoML repository, from " + "https://github.com/google/automl. Default: ./automl", + ) + parser.add_argument( + "-t", + "--nms_threshold", + type=float, + help="Override the score threshold for the NMS operation, " + "if higher than the threshold in the engine.", + ) args = parser.parse_args() if not all([args.engine, args.input, args.annotations]): parser.print_help() diff --git a/samples/python/efficientdet/image_batcher.py b/samples/python/efficientdet/image_batcher.py index e519a5db..11b94c24 100644 --- a/samples/python/efficientdet/image_batcher.py +++ b/samples/python/efficientdet/image_batcher.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -28,7 +28,16 @@ class ImageBatcher: Creates batches of pre-processed images. """ - def __init__(self, input, shape, dtype, max_num_images=None, exact_batches=False, preprocessor="EfficientDet", shuffle_files=False): + def __init__( + self, + input, + shape, + dtype, + max_num_images=None, + exact_batches=False, + preprocessor="EfficientDet", + shuffle_files=False, + ): """ :param input: The input directory to read images from. :param shape: The tensor shape of the batch to prepare, either in NCHW or NHWC format. @@ -47,10 +56,16 @@ def __init__(self, input, shape, dtype, max_num_images=None, exact_batches=False extensions = [".jpg", ".jpeg", ".png", ".bmp"] def is_image(path): - return os.path.isfile(path) and os.path.splitext(path)[1].lower() in extensions + return ( + os.path.isfile(path) and os.path.splitext(path)[1].lower() in extensions + ) if os.path.isdir(input): - self.images = [os.path.join(input, f) for f in os.listdir(input) if is_image(os.path.join(input, f))] + self.images = [ + os.path.join(input, f) + for f in os.listdir(input) + if is_image(os.path.join(input, f)) + ] self.images.sort() if shuffle_files: random.seed(47) @@ -129,7 +144,9 @@ def resize_pad(image, pad_color=(0, 0, 0)): width_scale = width / self.width height_scale = height / self.height scale = 1.0 / max(width_scale, height_scale) - image = image.resize((round(width * scale), round(height * scale)), resample=Image.BILINEAR) + image = image.resize( + (round(width * scale), round(height * scale)), resample=Image.BILINEAR + ) pad = Image.new("RGB", (self.width, self.height)) pad.paste(pad_color, [0, 0, self.width, self.height]) pad.paste(image) diff --git a/samples/python/efficientdet/infer.py b/samples/python/efficientdet/infer.py index 25bd28de..5308cf47 100644 --- a/samples/python/efficientdet/infer.py +++ b/samples/python/efficientdet/infer.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -62,7 +62,7 @@ def __init__(self, engine_path): shape = self.context.get_tensor_shape(name) if is_input and shape[0] < 0: assert self.engine.num_optimization_profiles > 0 - profile_shape = self.engine.get_profile_shape(0, name) + profile_shape = self.engine.get_tensor_profile_shape(name, 0) assert len(profile_shape) == 3 # min,opt,max # Set the *max* profile as binding shape self.context.set_input_shape(name, profile_shape[2]) @@ -87,9 +87,14 @@ def __init__(self, engine_path): self.inputs.append(binding) else: self.outputs.append(binding) - print("{} '{}' with shape {} and dtype {}".format( - "Input" if is_input else "Output", - binding['name'], binding['shape'], binding['dtype'])) + print( + "{} '{}' with shape {} and dtype {}".format( + "Input" if is_input else "Output", + binding["name"], + binding["shape"], + binding["dtype"], + ) + ) assert self.batch_size > 0 assert len(self.inputs) > 0 @@ -101,7 +106,7 @@ def input_spec(self): Get the specs for the input tensor of the network. Useful to prepare memory allocations. :return: Two items, the shape of the input tensor and its (numpy) datatype. """ - return self.inputs[0]['shape'], self.inputs[0]['dtype'] + return self.inputs[0]["shape"], self.inputs[0]["dtype"] def output_spec(self): """ @@ -110,7 +115,7 @@ def output_spec(self): """ specs = [] for o in self.outputs: - specs.append((o['shape'], o['dtype'])) + specs.append((o["shape"], o["dtype"])) return specs def infer(self, batch): @@ -120,11 +125,13 @@ def infer(self, batch): :return A list of outputs as numpy arrays. """ # Copy I/O and Execute - common.memcpy_host_to_device(self.inputs[0]['allocation'], batch) + common.memcpy_host_to_device(self.inputs[0]["allocation"], batch) self.context.execute_v2(self.allocations) for o in range(len(self.outputs)): - common.memcpy_device_to_host(self.outputs[o]['host_allocation'], self.outputs[o]['allocation']) - return [o['host_allocation'] for o in self.outputs] + common.memcpy_device_to_host( + self.outputs[o]["host_allocation"], self.outputs[o]["allocation"] + ) + return [o["host_allocation"] for o in self.outputs] def process(self, batch, scales=None, nms_threshold=None): """ @@ -143,11 +150,11 @@ def process(self, batch, scales=None, nms_threshold=None): scores = outputs[2] classes = outputs[3] detections = [] - normalized = (np.max(boxes) < 2.0) + normalized = np.max(boxes) < 2.0 for i in range(self.batch_size): detections.append([]) for n in range(int(nums[i])): - scale = self.inputs[0]['shape'][2] if normalized else 1.0 + scale = self.inputs[0]["shape"][2] if normalized else 1.0 if scales and i < len(scales): scale /= scales[i] if nms_threshold and scores[i][n] < nms_threshold: @@ -181,7 +188,12 @@ def main(args): print("Inferring data in {}".format(args.input)) batcher = ImageBatcher(args.input, *trt_infer.input_spec()) for batch, images, scales in batcher.get_batch(): - print("Processing Image {} / {}".format(batcher.image_index, batcher.num_images), end="\r") + print( + "Processing Image {} / {}".format( + batcher.image_index, batcher.num_images + ), + end="\r", + ) detections = trt_infer.process(batch, scales, args.nms_threshold) if args.output: for i in range(len(images)): @@ -192,9 +204,18 @@ def main(args): # Text Results output_results = "" for d in detections[i]: - line = [d['xmin'], d['ymin'], d['xmax'], d['ymax'], d['score'], d['class']] + line = [ + d["xmin"], + d["ymin"], + d["xmax"], + d["ymax"], + d["score"], + d["class"], + ] output_results += "\t".join([str(f) for f in line]) + "\n" - with open(os.path.join(output_dir, "{}.txt".format(basename)), "w") as f: + with open( + os.path.join(output_dir, "{}.txt".format(basename)), "w" + ) as f: f.write(output_results) else: print("No input provided, running in benchmark mode") @@ -210,10 +231,12 @@ def main(args): times.append(time.time() - start) print("Iteration {} / {}".format(i + 1, iterations), end="\r") print("Benchmark results include time for H2D and D2H memory copies") - print("Average Latency: {:.3f} ms".format( - 1000 * np.average(times))) - print("Average Throughput: {:.1f} ips".format( - trt_infer.batch_size / np.average(times))) + print("Average Latency: {:.3f} ms".format(1000 * np.average(times))) + print( + "Average Throughput: {:.1f} ips".format( + trt_infer.batch_size / np.average(times) + ) + ) print() print("Finished Processing") @@ -221,15 +244,33 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("-e", "--engine", default=None, required=True, - help="The serialized TensorRT engine") - parser.add_argument("-i", "--input", default=None, - help="Path to the image or directory to process") - parser.add_argument("-o", "--output", default=None, - help="Directory where to save the visualization results") - parser.add_argument("-l", "--labels", default="./labels_coco.txt", - help="File to use for reading the class labels from, default: ./labels_coco.txt") - parser.add_argument("-t", "--nms_threshold", type=float, - help="Override the score threshold for the NMS operation, if higher than the built-in threshold") + parser.add_argument( + "-e", + "--engine", + default=None, + required=True, + help="The serialized TensorRT engine", + ) + parser.add_argument( + "-i", "--input", default=None, help="Path to the image or directory to process" + ) + parser.add_argument( + "-o", + "--output", + default=None, + help="Directory where to save the visualization results", + ) + parser.add_argument( + "-l", + "--labels", + default="./labels_coco.txt", + help="File to use for reading the class labels from, default: ./labels_coco.txt", + ) + parser.add_argument( + "-t", + "--nms_threshold", + type=float, + help="Override the score threshold for the NMS operation, if higher than the built-in threshold", + ) args = parser.parse_args() main(args) diff --git a/samples/python/efficientdet/infer_tf.py b/samples/python/efficientdet/infer_tf.py index a02f87ee..a2ecbd93 100644 --- a/samples/python/efficientdet/infer_tf.py +++ b/samples/python/efficientdet/infer_tf.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -30,47 +30,51 @@ class TensorFlowInfer: """ def __init__(self, saved_model_path): - gpus = tf.config.experimental.list_physical_devices('GPU') + gpus = tf.config.experimental.list_physical_devices("GPU") for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) self.model = tf.saved_model.load(saved_model_path) - self.pred_fn = self.model.signatures['serving_default'] + self.pred_fn = self.model.signatures["serving_default"] # Setup I/O bindings self.batch_size = 1 self.inputs = [] fn_inputs = self.pred_fn.structured_input_signature[1] for i, input in enumerate(list(fn_inputs.values())): - self.inputs.append({ - 'index': i, - 'name': input.name, - 'dtype': np.dtype(input.dtype.as_numpy_dtype()), - 'shape': [1, 512, 512, 3], # This can be overridden later - }) + self.inputs.append( + { + "index": i, + "name": input.name, + "dtype": np.dtype(input.dtype.as_numpy_dtype()), + "shape": [1, 512, 512, 3], # This can be overridden later + } + ) self.outputs = [] fn_outputs = self.pred_fn.structured_outputs for i, output in enumerate(list(fn_outputs.values())): - self.outputs.append({ - 'index': i, - 'name': output.name, - 'dtype': np.dtype(output.dtype.as_numpy_dtype()), - 'shape': output.shape.as_list(), - }) + self.outputs.append( + { + "index": i, + "name": output.name, + "dtype": np.dtype(output.dtype.as_numpy_dtype()), + "shape": output.shape.as_list(), + } + ) def override_input_shape(self, input, shape): - self.inputs[input]['shape'] = shape + self.inputs[input]["shape"] = shape self.batch_size = shape[0] def input_spec(self): - return self.inputs[0]['shape'], self.inputs[0]['dtype'] + return self.inputs[0]["shape"], self.inputs[0]["dtype"] def output_spec(self): - return self.outputs[0]['shape'], self.outputs[0]['dtype'] + return self.outputs[0]["shape"], self.outputs[0]["dtype"] def infer(self, batch): # Process I/O and execute the network - input = {self.inputs[0]['name']: tf.convert_to_tensor(batch)} + input = {self.inputs[0]["name"]: tf.convert_to_tensor(batch)} output = self.pred_fn(**input) return output @@ -84,38 +88,42 @@ def process(self, batch, scales=None, nms_threshold=None): classes = None if len(self.outputs) == 1: # Detected as AutoML Saved Model - assert len(self.outputs[0]['shape']) == 3 and self.outputs[0]['shape'][2] == 7 - results = output[self.outputs[0]['name']].numpy() + assert ( + len(self.outputs[0]["shape"]) == 3 and self.outputs[0]["shape"][2] == 7 + ) + results = output[self.outputs[0]["name"]].numpy() boxes = results[:, :, 1:5] scores = results[:, :, 5] classes = results[:, :, 6].astype(np.int32) elif len(self.outputs) >= 4: # Detected as TFOD Saved Model - assert output['num_detections'] - num = int(output['num_detections'].numpy().flatten()[0]) - boxes = output['detection_boxes'].numpy()[:, 0:num, :] - scores = output['detection_scores'].numpy()[:, 0:num] - classes = output['detection_classes'].numpy()[:, 0:num] + assert output["num_detections"] + num = int(output["num_detections"].numpy().flatten()[0]) + boxes = output["detection_boxes"].numpy()[:, 0:num, :] + scores = output["detection_scores"].numpy()[:, 0:num] + classes = output["detection_classes"].numpy()[:, 0:num] # Process the results detections = [[]] - normalized = (np.max(boxes) < 2.0) + normalized = np.max(boxes) < 2.0 for n in range(scores.shape[1]): if scores[0][n] == 0.0: break - scale = self.inputs[0]['shape'][2] if normalized else 1.0 + scale = self.inputs[0]["shape"][2] if normalized else 1.0 if scales: scale /= scales[0] if nms_threshold and scores[0][n] < nms_threshold: continue - detections[0].append({ - 'ymin': boxes[0][n][0] * scale, - 'xmin': boxes[0][n][1] * scale, - 'ymax': boxes[0][n][2] * scale, - 'xmax': boxes[0][n][3] * scale, - 'score': scores[0][n], - 'class': int(classes[0][n]) - 1, - }) + detections[0].append( + { + "ymin": boxes[0][n][0] * scale, + "xmin": boxes[0][n][1] * scale, + "ymax": boxes[0][n][2] * scale, + "xmax": boxes[0][n][3] * scale, + "score": scores[0][n], + "class": int(classes[0][n]) - 1, + } + ) return detections @@ -137,10 +145,10 @@ def main(args): times.append(time.time() - start) print("Iteration {} / {}".format(i + 1, iterations), end="\r") print("Benchmark results include TensorFlow host overhead") - print("Average Latency: {:.3f} ms".format( - 1000 * np.average(times))) - print("Average Throughput: {:.1f} ips".format( - tf_infer.batch_size / np.average(times))) + print("Average Latency: {:.3f} ms".format(1000 * np.average(times))) + print( + "Average Throughput: {:.1f} ips".format(tf_infer.batch_size / np.average(times)) + ) print() print("Finished Processing") @@ -148,11 +156,24 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("-m", "--saved_model", required=True, - help="The TensorFlow saved model path to validate against") - parser.add_argument("-i", "--input_size", default="512,512", - help="The input size to run the model with, in HEIGHT,WIDTH format") - parser.add_argument("-b", "--batch_size", default=1, type=int, - help="The batch size to run the model with") + parser.add_argument( + "-m", + "--saved_model", + required=True, + help="The TensorFlow saved model path to validate against", + ) + parser.add_argument( + "-i", + "--input_size", + default="512,512", + help="The input size to run the model with, in HEIGHT,WIDTH format", + ) + parser.add_argument( + "-b", + "--batch_size", + default=1, + type=int, + help="The batch size to run the model with", + ) args = parser.parse_args() main(args) diff --git a/samples/python/efficientdet/onnx_utils.py b/samples/python/efficientdet/onnx_utils.py index e55f7e11..a98c3a7c 100644 --- a/samples/python/efficientdet/onnx_utils.py +++ b/samples/python/efficientdet/onnx_utils.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -36,7 +36,9 @@ def elt_const(self, op, name, input, value): input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created {} node '{}': {}".format(op, name, value.squeeze())) const = gs.Constant(name="{}_value:0".format(name), values=value) - return self.layer(name=name, op=op, inputs=[input_tensor, const], outputs=[name + ":0"]) + return self.layer( + name=name, op=op, inputs=[input_tensor, const], outputs=[name + ":0"] + ) @gs.Graph.register() @@ -51,7 +53,13 @@ def unsqueeze(self, name, input, axes=[-1]): """ input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created Unsqueeze node '{}': {}".format(name, axes)) - return self.layer(name=name, op="Unsqueeze", inputs=[input_tensor], outputs=[name + ":0"], attrs={"axes": axes}) + return self.layer( + name=name, + op="Unsqueeze", + inputs=[input_tensor], + outputs=[name + ":0"], + attrs={"axes": axes}, + ) @gs.Graph.register() @@ -66,7 +74,13 @@ def transpose(self, name, input, perm): """ input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created Transpose node '{}': {}".format(name, perm)) - return self.layer(name=name, op="Transpose", inputs=[input_tensor], outputs=[name + ":0"], attrs={"perm": perm}) + return self.layer( + name=name, + op="Transpose", + inputs=[input_tensor], + outputs=[name + ":0"], + attrs={"perm": perm}, + ) @gs.Graph.register() @@ -80,7 +94,9 @@ def sigmoid(self, name, input): """ input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created Sigmoid node '{}'".format(name)) - return self.layer(name=name, op="Sigmoid", inputs=[input_tensor], outputs=[name + ":0"]) + return self.layer( + name=name, op="Sigmoid", inputs=[input_tensor], outputs=[name + ":0"] + ) @gs.Graph.register() @@ -98,7 +114,9 @@ def plugin(self, op, name, inputs, outputs, attrs): """ input_tensors = inputs if type(inputs) is list else [inputs] log.debug("Created TRT Plugin node '{}': {}".format(name, attrs)) - return self.layer(op=op, name=name, inputs=input_tensors, outputs=outputs, attrs=attrs) + return self.layer( + op=op, name=name, inputs=input_tensors, outputs=outputs, attrs=attrs + ) @gs.Graph.register() diff --git a/samples/python/efficientdet/visualize.py b/samples/python/efficientdet/visualize.py index 4366f9e0..3fb982ef 100644 --- a/samples/python/efficientdet/visualize.py +++ b/samples/python/efficientdet/visualize.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -182,9 +182,18 @@ def visualize_detections(image_path, output_path, detections, labels=[]): text_left = d["xmin"] margin = np.ceil(0.05 * text_height) draw.rectangle( - [(text_left, text_bottom - text_height - 2 * margin), (text_left + text_width, text_bottom)], fill=color + [ + (text_left, text_bottom - text_height - 2 * margin), + (text_left + text_width, text_bottom), + ], + fill=color, + ) + draw.text( + (text_left + margin, text_bottom - text_height - margin), + text, + fill="black", + font=font, ) - draw.text((text_left + margin, text_bottom - text_height - margin), text, fill="black", font=font) if output_path is None: return image image.save(output_path) @@ -195,7 +204,12 @@ def draw_text(draw, font, text, width, bar_height, offset, color): left, top, right, bottom = font.getbbox(text) text_width, text_height = right - left, bottom - top draw.rectangle([(offset, 0), (offset + width, bar_height)], fill=color) - draw.text((offset + (width - text_width) / 2, text_height - text_height / 2), text, fill="black", font=font) + draw.text( + (offset + (width - text_width) / 2, text_height - text_height / 2), + text, + fill="black", + font=font, + ) bar_height = 18 width = 0 diff --git a/samples/python/efficientnet/build_engine.py b/samples/python/efficientnet/build_engine.py index a4d75552..683c567c 100644 --- a/samples/python/efficientnet/build_engine.py +++ b/samples/python/efficientnet/build_engine.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -56,7 +56,10 @@ def set_image_batcher(self, image_batcher: ImageBatcher): :param image_batcher: The ImageBatcher object """ self.image_batcher = image_batcher - size = int(np.dtype(self.image_batcher.dtype).itemsize * np.prod(self.image_batcher.shape)) + size = int( + np.dtype(self.image_batcher.dtype).itemsize + * np.prod(self.image_batcher.shape) + ) self.batch_allocation = common.cuda_call(cudart.cudaMalloc(size)) self.batch_generator = self.image_batcher.get_batch() @@ -81,8 +84,14 @@ def get_batch(self, names): return None try: batch, _ = next(self.batch_generator) - log.info("Calibrating image {} / {}".format(self.image_batcher.image_index, self.image_batcher.num_images)) - common.memcpy_host_to_device(self.batch_allocation, np.ascontiguousarray(batch)) + log.info( + "Calibrating image {} / {}".format( + self.image_batcher.image_index, self.image_batcher.num_images + ) + ) + common.memcpy_host_to_device( + self.batch_allocation, np.ascontiguousarray(batch) + ) return [int(self.batch_allocation)] except StopIteration: log.info("Finished calibration batches") @@ -127,7 +136,9 @@ def __init__(self, verbose=False): self.builder = trt.Builder(self.trt_logger) self.config = self.builder.create_builder_config() - self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 8 * (2 ** 30)) # 8 GB + self.config.set_memory_pool_limit( + trt.MemoryPoolType.WORKSPACE, 8 * (2**30) + ) # 8 GB self.batch_size = None self.network = None @@ -156,9 +167,17 @@ def create_network(self, onnx_path): log.info("Network Description") for input in inputs: self.batch_size = input.shape[0] - log.info("Input '{}' with shape {} and dtype {}".format(input.name, input.shape, input.dtype)) + log.info( + "Input '{}' with shape {} and dtype {}".format( + input.name, input.shape, input.dtype + ) + ) for output in outputs: - log.info("Output '{}' with shape {} and dtype {}".format(output.name, output.shape, output.dtype)) + log.info( + "Output '{}' with shape {} and dtype {}".format( + output.name, output.shape, output.dtype + ) + ) assert self.batch_size > 0 def create_engine( @@ -254,8 +273,12 @@ def main(args): choices=["fp32", "fp16", "int8"], help="The precision mode to build in, either 'fp32', 'fp16' or 'int8', default: 'fp16'", ) - parser.add_argument("-v", "--verbose", action="store_true", help="Enable more verbose log output") - parser.add_argument("--calib_input", help="The directory holding images to use for calibration") + parser.add_argument( + "-v", "--verbose", action="store_true", help="Enable more verbose log output" + ) + parser.add_argument( + "--calib_input", help="The directory holding images to use for calibration" + ) parser.add_argument( "--calib_cache", default="./calibration.cache", @@ -268,7 +291,10 @@ def main(args): help="The maximum number of images to use for calibration, default: 25000", ) parser.add_argument( - "--calib_batch_size", default=8, type=int, help="The batch size for the calibration process, default: 1" + "--calib_batch_size", + default=8, + type=int, + help="The batch size for the calibration process, default: 1", ) parser.add_argument( "--calib_preprocessor", @@ -288,6 +314,8 @@ def main(args): sys.exit(1) if args.precision == "int8" and not any([args.calib_input, args.calib_cache]): parser.print_help() - log.error("When building in int8 precision, either --calib_input or --calib_cache are required") + log.error( + "When building in int8 precision, either --calib_input or --calib_cache are required" + ) sys.exit(1) main(args) diff --git a/samples/python/efficientnet/compare_tf.py b/samples/python/efficientnet/compare_tf.py index 6d9ad88f..2671572e 100644 --- a/samples/python/efficientnet/compare_tf.py +++ b/samples/python/efficientnet/compare_tf.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -91,7 +91,10 @@ def main(args): trt_infer = TensorRTInfer(args.engine) batcher = ImageBatcher( - args.input, *trt_infer.input_spec(), max_num_images=args.num_images, preprocessor=args.preprocessor + args.input, + *trt_infer.input_spec(), + max_num_images=args.num_images, + preprocessor=args.preprocessor ) # Make sure both systems use the same input spec, so we can use the exact same image batches with both @@ -101,14 +104,20 @@ def main(args): print("Input datatype does not match") print("TRT Engine Input Dtype: {} {}".format(trt_dtype)) print("TF Saved Model Input Dtype: {} {}".format(tf_dtype)) - print("Please use the same TensorFlow saved model that the TensorRT engine was built with") + print( + "Please use the same TensorFlow saved model that the TensorRT engine was built with" + ) sys.exit(1) - if (tf_shape[1] and trt_shape[1] != tf_shape[1]) or (tf_shape[2] and trt_shape[2] != tf_shape[2]): + if (tf_shape[1] and trt_shape[1] != tf_shape[1]) or ( + tf_shape[2] and trt_shape[2] != tf_shape[2] + ): print("Input shapes do not match") print("TRT Engine Input Shape: {} {}".format(trt_shape[1:])) print("TF Saved Model Input Shape: {} {}".format(tf_shape[1:])) - print("Please use the same TensorFlow saved model that the TensorRT engine was built with") + print( + "Please use the same TensorFlow saved model that the TensorRT engine was built with" + ) sys.exit(1) match = 0 @@ -131,24 +140,40 @@ def main(args): print( "Processing {} / {} images: {:.2f}% match ".format( - batcher.image_index, batcher.num_images, (100 * (match / batcher.image_index)) + batcher.image_index, + batcher.num_images, + (100 * (match / batcher.image_index)), ), end="\r", ) print() pc = 100 * (match / batcher.num_images) - print("Matching Top-1 class predictions for {} out of {} images: {:.2f}%".format(match, batcher.num_images, pc)) + print( + "Matching Top-1 class predictions for {} out of {} images: {:.2f}%".format( + match, batcher.num_images, pc + ) + ) avgerror = np.sqrt(error / batcher.num_images) - print("RMSE between TensorFlow and TensorRT confidence scores: {:.3f}".format(avgerror)) + print( + "RMSE between TensorFlow and TensorRT confidence scores: {:.3f}".format( + avgerror + ) + ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with") - parser.add_argument("-m", "--saved_model", help="The TensorFlow saved model path to validate against") parser.add_argument( - "-i", "--input", help="The input to infer, either a single image path, or a directory of images" + "-m", + "--saved_model", + help="The TensorFlow saved model path to validate against", + ) + parser.add_argument( + "-i", + "--input", + help="The input to infer, either a single image path, or a directory of images", ) parser.add_argument( "-n", diff --git a/samples/python/efficientnet/create_onnx.py b/samples/python/efficientnet/create_onnx.py index b98fd137..c0e7d109 100644 --- a/samples/python/efficientnet/create_onnx.py +++ b/samples/python/efficientnet/create_onnx.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -32,12 +32,18 @@ def main(args): # Load saved model saved_model_path = os.path.realpath(args.saved_model) assert os.path.isdir(saved_model_path) - graph_def, inputs, outputs = tf_loader.from_saved_model(saved_model_path, None, None, "serve", ["serving_default"]) + graph_def, inputs, outputs = tf_loader.from_saved_model( + saved_model_path, None, None, "serve", ["serving_default"] + ) with tf.Graph().as_default() as tf_graph: tf.import_graph_def(graph_def, name="") with tf_loader.tf_session(graph=tf_graph): - onnx_graph = tfonnx.process_tf_graph(tf_graph, input_names=inputs, output_names=outputs, opset=11) - onnx_model = optimizer.optimize_graph(onnx_graph).make_model("Converted from {}".format(saved_model_path)) + onnx_graph = tfonnx.process_tf_graph( + tf_graph, input_names=inputs, output_names=outputs, opset=11 + ) + onnx_model = optimizer.optimize_graph(onnx_graph).make_model( + "Converted from {}".format(saved_model_path) + ) graph = gs.import_onnx(onnx_model) assert graph print() @@ -55,11 +61,21 @@ def main(args): # Format NCHW graph.inputs[0].shape[2] = args.input_size graph.inputs[0].shape[3] = args.input_size - print("ONNX input named '{}' with shape {}".format(graph.inputs[0].name, graph.inputs[0].shape)) - print("ONNX output named '{}' with shape {}".format(graph.outputs[0].name, graph.outputs[0].shape)) + print( + "ONNX input named '{}' with shape {}".format( + graph.inputs[0].name, graph.inputs[0].shape + ) + ) + print( + "ONNX output named '{}' with shape {}".format( + graph.outputs[0].name, graph.outputs[0].shape + ) + ) for i in range(4): if type(graph.inputs[0].shape[i]) != int or graph.inputs[0].shape[i] <= 0: - print("The input shape of the graph is invalid, try overriding it by giving a fixed size with --input_size") + print( + "The input shape of the graph is invalid, try overriding it by giving a fixed size with --input_size" + ) sys.exit(1) # Fix Clip Nodes (ReLU6) @@ -85,9 +101,13 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("-m", "--saved_model", help="The TensorFlow saved model directory to load") + parser.add_argument( + "-m", "--saved_model", help="The TensorFlow saved model directory to load" + ) parser.add_argument("-o", "--onnx", help="The output ONNX model file to write") - parser.add_argument("-b", "--batch_size", type=int, default=1, help="Set the batch size, default: 1") + parser.add_argument( + "-b", "--batch_size", type=int, default=1, help="Set the batch size, default: 1" + ) parser.add_argument( "-i", "--input_size", diff --git a/samples/python/efficientnet/eval_gt.py b/samples/python/efficientnet/eval_gt.py index 14d5a8d1..9f57aaa5 100644 --- a/samples/python/efficientnet/eval_gt.py +++ b/samples/python/efficientnet/eval_gt.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,17 +24,25 @@ from infer import TensorRTInfer from image_batcher import ImageBatcher + def main(args): annotations = {} for line in open(args.annotations, "r"): line = line.strip().split(args.separator) if len(line) < 2 or not line[1].isnumeric(): - print("Could not parse the annotations file correctly, make sure the correct separator is used") + print( + "Could not parse the annotations file correctly, make sure the correct separator is used" + ) sys.exit(1) annotations[os.path.basename(line[0])] = int(line[1]) trt_infer = TensorRTInfer(args.engine) - batcher = ImageBatcher(args.input, *trt_infer.input_spec(), max_num_images=args.num_images, preprocessor=args.preprocessor) + batcher = ImageBatcher( + args.input, + *trt_infer.input_spec(), + max_num_images=args.num_images, + preprocessor=args.preprocessor + ) top1 = 0 top5 = 0 total = 0 @@ -70,9 +78,15 @@ def main(args): parser = argparse.ArgumentParser() parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with") parser.add_argument( - "-i", "--input", help="The input to infer, either a single image path, or a directory of images" + "-i", + "--input", + help="The input to infer, either a single image path, or a directory of images", + ) + parser.add_argument( + "-a", + "--annotations", + help="Set the file to use for classification ground truth annotations", ) - parser.add_argument("-a", "--annotations", help="Set the file to use for classification ground truth annotations") parser.add_argument( "-s", "--separator", diff --git a/samples/python/efficientnet/image_batcher.py b/samples/python/efficientnet/image_batcher.py index 996a72a3..63d37784 100644 --- a/samples/python/efficientnet/image_batcher.py +++ b/samples/python/efficientnet/image_batcher.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -27,7 +27,15 @@ class ImageBatcher: Creates batches of pre-processed images. """ - def __init__(self, input, shape, dtype, max_num_images=None, exact_batches=False, preprocessor="V2"): + def __init__( + self, + input, + shape, + dtype, + max_num_images=None, + exact_batches=False, + preprocessor="V2", + ): """ :param input: The input directory to read images from. :param shape: The tensor shape of the batch to prepare, either in NCHW or NHWC format. @@ -45,10 +53,16 @@ def __init__(self, input, shape, dtype, max_num_images=None, exact_batches=False extensions = [".jpg", ".jpeg", ".png", ".bmp"] def is_image(path): - return os.path.isfile(path) and os.path.splitext(path)[1].lower() in extensions + return ( + os.path.isfile(path) and os.path.splitext(path)[1].lower() in extensions + ) if os.path.isdir(input): - self.images = [os.path.join(input, f) for f in os.listdir(input) if is_image(os.path.join(input, f))] + self.images = [ + os.path.join(input, f) + for f in os.listdir(input) + if is_image(os.path.join(input, f)) + ] self.images.sort() elif os.path.isfile(input): if is_image(input): diff --git a/samples/python/efficientnet/infer.py b/samples/python/efficientnet/infer.py index 2c70b14e..cc18e1c8 100644 --- a/samples/python/efficientnet/infer.py +++ b/samples/python/efficientnet/infer.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -110,7 +110,9 @@ def infer(self, batch, top=1): output = np.zeros(*self.output_spec()) # Process I/O and execute the network - common.memcpy_host_to_device(self.inputs[0]["allocation"], np.ascontiguousarray(batch)) + common.memcpy_host_to_device( + self.inputs[0]["allocation"], np.ascontiguousarray(batch) + ) self.context.execute_v2(self.allocations) common.memcpy_device_to_host(output, self.outputs[0]["allocation"]) @@ -126,7 +128,9 @@ def infer(self, batch, top=1): def main(args): trt_infer = TensorRTInfer(args.engine) - batcher = ImageBatcher(args.input, *trt_infer.input_spec(), preprocessor=args.preprocessor) + batcher = ImageBatcher( + args.input, *trt_infer.input_spec(), preprocessor=args.preprocessor + ) for batch, images in batcher.get_batch(): classes, scores, top = trt_infer.infer(batch) for i in range(len(images)): @@ -146,10 +150,16 @@ def main(args): parser = argparse.ArgumentParser() parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with") parser.add_argument( - "-i", "--input", help="The input to infer, either a single image path, or a directory of images" + "-i", + "--input", + help="The input to infer, either a single image path, or a directory of images", ) parser.add_argument( - "-t", "--top", default=1, type=int, help="The amount of top classes and scores to output per image, default: 1" + "-t", + "--top", + default=1, + type=int, + help="The amount of top classes and scores to output per image, default: 1", ) parser.add_argument( "-s", diff --git a/samples/python/engine_refit_onnx_bidaf/build_and_refit_engine.py b/samples/python/engine_refit_onnx_bidaf/build_and_refit_engine.py index 268a5cf5..240f1295 100644 --- a/samples/python/engine_refit_onnx_bidaf/build_and_refit_engine.py +++ b/samples/python/engine_refit_onnx_bidaf/build_and_refit_engine.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,25 +20,25 @@ import sys import numpy as np - +import argparse import tensorrt as trt -from data_processing import get_inputs, preprocess sys.path.insert(1, os.path.join(sys.path[0], "..")) -import common +from cuda import cudart TRT_LOGGER = trt.Logger() -def get_engine(onnx_file_path, engine_file_path): +def get_plan(onnx_file_path, engine_file_path, version_compatible): """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it.""" - def build_engine(): + def build_plan(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" + import tensorrt as trt + builder = trt.Builder(TRT_LOGGER) - network = builder.create_network(common.EXPLICIT_BATCH) + network = builder.create_network(0) parser = trt.OnnxParser(network, TRT_LOGGER) - runtime = trt.Runtime(TRT_LOGGER) # Parse model file print("Loading ONNX file from path {}...".format(onnx_file_path)) @@ -59,8 +59,8 @@ def build_engine(): config = builder.create_builder_config() config.set_flag(trt.BuilderFlag.REFIT) - config.max_workspace_size = 1 << 28 # 256MiB - + if version_compatible: + config.set_flag(trt.BuilderFlag.VERSION_COMPATIBLE) for opt in [6, 10]: profile = builder.create_optimization_profile() @@ -68,47 +68,119 @@ def build_engine(): input0_min = (1, 1) input0_opt = (opt, 1) input0_max = (15, 1) - profile.set_shape(network.get_input(0).name, min=input0_min, opt=input0_opt, max=input0_max) + profile.set_shape( + network.get_input(0).name, + min=input0_min, + opt=input0_opt, + max=input0_max, + ) input1_min = (1, 1, 1, 16) input1_opt = (opt, 1, 1, 16) input1_max = (15, 1, 1, 16) - profile.set_shape(network.get_input(1).name, min=input1_min, opt=input1_opt, max=input1_max) + profile.set_shape( + network.get_input(1).name, + min=input1_min, + opt=input1_opt, + max=input1_max, + ) input2_min = (1, 1) input2_opt = (opt, 1) input2_max = (15, 1) - profile.set_shape(network.get_input(2).name, min=input2_min, opt=input2_opt, max=input2_max) + profile.set_shape( + network.get_input(2).name, + min=input2_min, + opt=input2_opt, + max=input2_max, + ) input3_min = (1, 1, 1, 16) input3_opt = (opt, 1, 1, 16) input3_max = (15, 1, 1, 16) - profile.set_shape(network.get_input(3).name, min=input3_min, opt=input3_opt, max=input3_max) + profile.set_shape( + network.get_input(3).name, + min=input3_min, + opt=input3_opt, + max=input3_max, + ) config.add_optimization_profile(profile) - print("Building an engine from file {}; this may take a while...".format(onnx_file_path)) + print( + "Building an engine from file {}; this may take a while...".format( + onnx_file_path + ) + ) plan = builder.build_serialized_network(network, config) - engine = runtime.deserialize_cuda_engine(plan) print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(plan) - return engine + return plan if os.path.exists(engine_file_path): # If a serialized engine exists, use it instead of building an engine. - print("Reading engine from file {}".format(engine_file_path)) - with open(engine_file_path, "rb") as f: - runtime = trt.Runtime(TRT_LOGGER) - return runtime.deserialize_cuda_engine(f.read()) - else: - return build_engine() + print("Reading engine from file {}...".format(engine_file_path)) + f = open(engine_file_path, "rb") + return f.read() + return build_plan() def main(): + global trt + global TRT_LOGGER + + parser = argparse.ArgumentParser() + parser.add_argument( + "-l", + "--weights-location", + dest="weights_location", + default="GPU", + choices=["GPU", "CPU"], + help="The location for weights passed to refitter, either GPU/CPU, default: GPU", + ) + parser.add_argument( + "--version-compatible", + dest="version_compatible", + action="store_true", + help="Build a version compatible engine for refitting", + ) + args = parser.parse_args() + onnx_file_path = "bidaf-modified.onnx" - engine_file_path = "bidaf.trt" + engine_file_path = "bidaf{}.trt".format("-vc" if args.version_compatible else "") + + plan = get_plan(onnx_file_path, engine_file_path, args.version_compatible) + + if args.version_compatible: + # Try using dispatch runtime for refitting and inference. If failed, fallback to full runtime. + try: + del sys.modules["tensorrt"] + sys.modules["tensorrt"] = __import__("tensorrt_dispatch") + sys.modules["trt"] = sys.modules["tensorrt"] + import tensorrt_dispatch as trt + + print( + "Importing tensorrt_dispatch instead of full tensorrt for refitting and running vc engines." + ) + except: + print( + "Failed to import tensorrt_dispatch for refitting and running vc engines. Please install the package first!" + ) + sys.modules["tensorrt"] = __import__("tensorrt") + TRT_LOGGER = trt.Logger() + + engine = None + with open(engine_file_path, "rb") as f: + runtime = trt.Runtime(TRT_LOGGER) + if args.version_compatible: + runtime.engine_host_code_allowed = True + engine = runtime.deserialize_cuda_engine(plan) + + # should be after get_engine + from data_processing import get_inputs, preprocess + import common_runtime as common # input context = "A quick brown fox jumps over the lazy dog." @@ -119,50 +191,93 @@ def main(): # Do inference with TensorRT weights_names = ["Parameter576_B_0", "W_0"] - refit_weights_dict = {name: np.load("{}.npy".format(name)) for name in weights_names} - fake_weights_dict = {name: np.ones_like(weights) for name, weights in refit_weights_dict.items()} - engine = get_engine(onnx_file_path, engine_file_path) + refit_weights_dict = { + name: np.load("{}.npy".format(name)) for name in weights_names + } + fake_weights_dict = { + name: np.ones_like(weights) for name, weights in refit_weights_dict.items() + } + device_mem_dict = {} + if args.weights_location == "GPU": + for name, weights in refit_weights_dict.items(): + nbytes = weights.size * weights.itemsize + device_mem_dict[name] = common.cuda_call(cudart.cudaMalloc(nbytes)) + + execution_context = engine.create_execution_context() refitter = trt.Refitter(engine, TRT_LOGGER) - for weights_dict, answer_correct in [(fake_weights_dict, False), (refit_weights_dict, True)]: - print("Refitting engine...") - # To get a list of all refittable weights' names - # in the network, use refitter.get_all_weights(). - + # Skip weights validation since we are confident that the new weights are similar to the weights used to build engine. + refitter.weights_validation = False + # To get a list of all refittable weights' names + # in the network, use refitter.get_all_weights(). + + if args.weights_location == "GPU": + for name, device_mem in device_mem_dict.items(): + device_weights = trt.Weights( + trt.DataType.FLOAT, device_mem, refit_weights_dict[name].size + ) + weights_prototype = refitter.get_weights_prototype(name) + assert device_weights.dtype == weights_prototype.dtype + assert device_weights.size == weights_prototype.size + refitter.set_named_weights(name, device_weights, trt.TensorLocation.DEVICE) + + for weights_dict, answer_correct in [ + (fake_weights_dict, False), + (refit_weights_dict, True), + ]: + import time + + T1 = time.perf_counter() + device_mem_list = [] # Refit named weights via set_named_weights for name in weights_names: - refitter.set_named_weights(name, weights_dict[name]) - - # Get missing weights names. This should return empty - # lists in this case. + host_weights = weights_dict[name] + if args.weights_location == "CPU": + weights = host_weights + location = trt.TensorLocation.HOST + refitter.set_named_weights(name, weights, location) + else: + common.memcpy_host_to_device(device_mem_dict[name], host_weights) + + # Get missing weights names. This should return empty lists in this case. missing_weights = refitter.get_missing_weights() assert ( len(missing_weights) == 0 ), "Refitter found missing weights. Call set_named_weights() or set_weights() for all missing weights" - # Refit the engine with the new weights. This will return True if - # the refit operation succeeded. + + print(f"Refitting engine from {args.weights_location} weights...") + # Refit the engine with the new weights. This will return True if the refit operation succeeded. assert refitter.refit_cuda_engine() + T2 = time.perf_counter() + print("Engine refitted in {:.2f} ms.".format((T2 - T1) * 1000)) + for profile_idx in range(engine.num_optimization_profiles): print("Doing inference...") # Do inference - inputs, outputs, bindings, stream = common.allocate_buffers(engine, profile_idx) + inputs, outputs, bindings, stream = common.allocate_buffers( + engine, profile_idx + ) padding_bindings = [0] * (len(bindings) * profile_idx) new_bindings = padding_bindings + bindings - # Set host input. The common.do_inference_v2 function will copy the input to the GPU before executing. + # Set host input. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = cw inputs[1].host = cc inputs[2].host = qw inputs[3].host = qc - execution_context = engine.create_execution_context() execution_context.set_optimization_profile_async(profile_idx, stream) execution_context.set_input_shape("CategoryMapper_4", (10, 1)) execution_context.set_input_shape("CategoryMapper_5", (10, 1, 1, 16)) execution_context.set_input_shape("CategoryMapper_6", (6, 1)) execution_context.set_input_shape("CategoryMapper_7", (6, 1, 1, 16)) - trt_outputs = common.do_inference_v2( - execution_context, bindings=new_bindings, inputs=inputs, outputs=outputs, stream=stream + trt_outputs = common.do_inference( + execution_context, + engine=engine, + bindings=bindings, + inputs=inputs, + outputs=outputs, + stream=stream, ) start = trt_outputs[0].item() @@ -170,6 +285,10 @@ def main(): answer = [w.encode() for w in cw_str[start : end + 1].reshape(-1)] assert answer_correct == (answer == [b"brown"]), answer common.free_buffers(inputs, outputs, stream) + + for _, device_mem in device_mem_dict.items(): + common.cuda_call(cudart.cudaFree(device_mem)) + print("Passed") diff --git a/samples/python/engine_refit_onnx_bidaf/data_processing.py b/samples/python/engine_refit_onnx_bidaf/data_processing.py index 6eb90fa0..f6740bc5 100644 --- a/samples/python/engine_refit_onnx_bidaf/data_processing.py +++ b/samples/python/engine_refit_onnx_bidaf/data_processing.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,7 +40,9 @@ def preprocess(text): def get_map_func(filepath): file = open(filepath) category_map = json.load(file) - category_mapper = dict(zip(category_map["cats_strings"], category_map["cats_int64s"])) + category_mapper = dict( + zip(category_map["cats_strings"], category_map["cats_int64s"]) + ) default_int64 = category_map["default_int64"] func = lambda s: category_mapper.get(s, default_int64) return np.vectorize(func) diff --git a/samples/python/engine_refit_onnx_bidaf/prepare_model.py b/samples/python/engine_refit_onnx_bidaf/prepare_model.py index cbeb6a92..eb45226e 100644 --- a/samples/python/engine_refit_onnx_bidaf/prepare_model.py +++ b/samples/python/engine_refit_onnx_bidaf/prepare_model.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -82,7 +82,9 @@ def save_weights_for_refitting(graph): def main(): - org_model_file_path = getFilePath("samples/python/engine_refit_onnx_bidaf/bidaf-original.onnx") + org_model_file_path = getFilePath( + "samples/python/engine_refit_onnx_bidaf/bidaf-original.onnx" + ) print("Modifying the ONNX model ...") original_model = onnx.load(org_model_file_path) diff --git a/samples/python/introductory_parser_samples/onnx_resnet50.py b/samples/python/introductory_parser_samples/onnx_resnet50.py index f07e99ff..fd69cc48 100644 --- a/samples/python/introductory_parser_samples/onnx_resnet50.py +++ b/samples/python/introductory_parser_samples/onnx_resnet50.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,6 +40,7 @@ class ModelData(object): # You can set the logger severity higher to suppress messages (or lower to display more messages). TRT_LOGGER = trt.Logger(trt.Logger.WARNING) + # The Onnx path is used for Onnx models. def build_engine_onnx(model_file): builder = trt.Builder(TRT_LOGGER) @@ -111,7 +112,14 @@ def main(): test_case = load_normalized_test_case(test_image, inputs[0].host) # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the # probability that the image corresponds to that label - trt_outputs = common.do_inference(context, engine=engine, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) + trt_outputs = common.do_inference( + context, + engine=engine, + bindings=bindings, + inputs=inputs, + outputs=outputs, + stream=stream, + ) # We use the highest probability as our prediction. Its index corresponds to the predicted label. pred = labels[np.argmax(trt_outputs[0])] common.free_buffers(inputs, outputs, stream) diff --git a/samples/python/network_api_pytorch_mnist/model.py b/samples/python/network_api_pytorch_mnist/model.py index 3f1a4fe4..53371989 100644 --- a/samples/python/network_api_pytorch_mnist/model.py +++ b/samples/python/network_api_pytorch_mnist/model.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -59,7 +59,9 @@ def __init__(self): "/tmp/mnist/data", train=True, download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]), + transform=transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] + ), ), batch_size=self.batch_size, shuffle=True, @@ -70,7 +72,9 @@ def __init__(self): datasets.MNIST( "/tmp/mnist/data", train=False, - transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]), + transform=transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] + ), ), batch_size=self.test_batch_size, shuffle=True, @@ -86,7 +90,11 @@ def learn(self, num_epochs=2): # Train the network for a single epoch def train(epoch): self.network.train() - optimizer = optim.SGD(self.network.parameters(), lr=self.learning_rate, momentum=self.sgd_momentum) + optimizer = optim.SGD( + self.network.parameters(), + lr=self.learning_rate, + momentum=self.sgd_momentum, + ) for batch, (data, target) in enumerate(self.train_loader): if torch.cuda.is_available(): data = data.to("cuda") @@ -126,7 +134,10 @@ def test(epoch): test_loss /= len(self.test_loader) print( "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format( - test_loss, correct, len(self.test_loader.dataset), 100.0 * correct / len(self.test_loader.dataset) + test_loss, + correct, + len(self.test_loader.dataset), + 100.0 * correct / len(self.test_loader.dataset), ) ) diff --git a/samples/python/network_api_pytorch_mnist/sample.py b/samples/python/network_api_pytorch_mnist/sample.py index 1f634443..a695ee9a 100644 --- a/samples/python/network_api_pytorch_mnist/sample.py +++ b/samples/python/network_api_pytorch_mnist/sample.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -41,7 +41,9 @@ class ModelData(object): def populate_network(network, weights): # Configure the network layers based on the weights provided. - input_tensor = network.add_input(name=ModelData.INPUT_NAME, dtype=ModelData.DTYPE, shape=ModelData.INPUT_SHAPE) + input_tensor = network.add_input( + name=ModelData.INPUT_NAME, dtype=ModelData.DTYPE, shape=ModelData.INPUT_SHAPE + ) def add_matmul_as_fc(net, input, outputs, w, b): assert len(input.shape) >= 3 @@ -64,7 +66,9 @@ def add_matmul_as_fc(net, input, outputs, w, b): ) bias_const = net.add_constant(trt.Dims2(1, n), b) - bias_add = net.add_elementwise(mm.get_output(0), bias_const.get_output(0), trt.ElementWiseOperation.SUM) + bias_add = net.add_elementwise( + mm.get_output(0), bias_const.get_output(0), trt.ElementWiseOperation.SUM + ) output_reshape = net.add_shuffle(bias_add.get_output(0)) output_reshape.reshape_dims = trt.Dims4(m, n, 1, 1) @@ -73,16 +77,24 @@ def add_matmul_as_fc(net, input, outputs, w, b): conv1_w = weights["conv1.weight"].cpu().numpy() conv1_b = weights["conv1.bias"].cpu().numpy() conv1 = network.add_convolution_nd( - input=input_tensor, num_output_maps=20, kernel_shape=(5, 5), kernel=conv1_w, bias=conv1_b + input=input_tensor, + num_output_maps=20, + kernel_shape=(5, 5), + kernel=conv1_w, + bias=conv1_b, ) conv1.stride_nd = (1, 1) - pool1 = network.add_pooling_nd(input=conv1.get_output(0), type=trt.PoolingType.MAX, window_size=(2, 2)) + pool1 = network.add_pooling_nd( + input=conv1.get_output(0), type=trt.PoolingType.MAX, window_size=(2, 2) + ) pool1.stride_nd = trt.Dims2(2, 2) conv2_w = weights["conv2.weight"].cpu().numpy() conv2_b = weights["conv2.bias"].cpu().numpy() - conv2 = network.add_convolution_nd(pool1.get_output(0), 50, (5, 5), conv2_w, conv2_b) + conv2 = network.add_convolution_nd( + pool1.get_output(0), 50, (5, 5), conv2_w, conv2_b + ) conv2.stride_nd = (1, 1) pool2 = network.add_pooling_nd(conv2.get_output(0), trt.PoolingType.MAX, (2, 2)) @@ -92,11 +104,15 @@ def add_matmul_as_fc(net, input, outputs, w, b): fc1_b = weights["fc1.bias"].cpu().numpy() fc1 = add_matmul_as_fc(network, pool2.get_output(0), 500, fc1_w, fc1_b) - relu1 = network.add_activation(input=fc1.get_output(0), type=trt.ActivationType.RELU) + relu1 = network.add_activation( + input=fc1.get_output(0), type=trt.ActivationType.RELU + ) fc2_w = weights["fc2.weight"].cpu().numpy() fc2_b = weights["fc2.bias"].cpu().numpy() - fc2 = add_matmul_as_fc(network, relu1.get_output(0), ModelData.OUTPUT_SIZE, fc2_w, fc2_b) + fc2 = add_matmul_as_fc( + network, relu1.get_output(0), ModelData.OUTPUT_SIZE, fc2_w, fc2_b + ) fc2.get_output(0).name = ModelData.OUTPUT_NAME network.mark_output(tensor=fc2.get_output(0)) @@ -143,7 +159,14 @@ def main(): case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. - [output] = common.do_inference(context, engine=engine, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) + [output] = common.do_inference( + context, + engine=engine, + bindings=bindings, + inputs=inputs, + outputs=outputs, + stream=stream, + ) pred = np.argmax(output) common.free_buffers(inputs, outputs, stream) print("Test Case: " + str(case_num)) diff --git a/samples/python/onnx_custom_plugin/CMakeLists.txt b/samples/python/onnx_custom_plugin/CMakeLists.txt index 75f69af4..f00bcd31 100644 --- a/samples/python/onnx_custom_plugin/CMakeLists.txt +++ b/samples/python/onnx_custom_plugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/python/onnx_custom_plugin/load_plugin_lib.py b/samples/python/onnx_custom_plugin/load_plugin_lib.py index 0a85f18e..a3feaa37 100644 --- a/samples/python/onnx_custom_plugin/load_plugin_lib.py +++ b/samples/python/onnx_custom_plugin/load_plugin_lib.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,7 +18,9 @@ import os import ctypes -WORKING_DIR = os.environ.get("TRT_WORKING_DIR") or os.path.dirname(os.path.realpath(__file__)) +WORKING_DIR = os.environ.get("TRT_WORKING_DIR") or os.path.dirname( + os.path.realpath(__file__) +) IS_WINDOWS = os.name == "nt" if IS_WINDOWS: HARDMAX_PLUGIN_LIBRARY_NAME = "customHardmaxPlugin.dll" @@ -28,7 +30,10 @@ ] else: HARDMAX_PLUGIN_LIBRARY_NAME = "libcustomHardmaxPlugin.so" - HARDMAX_PLUGIN_LIBRARY = [os.path.join(WORKING_DIR, "build", HARDMAX_PLUGIN_LIBRARY_NAME)] + HARDMAX_PLUGIN_LIBRARY = [ + os.path.join(WORKING_DIR, "build", HARDMAX_PLUGIN_LIBRARY_NAME) + ] + def load_plugin_lib(): for plugin_lib in HARDMAX_PLUGIN_LIBRARY: diff --git a/samples/python/onnx_custom_plugin/model.py b/samples/python/onnx_custom_plugin/model.py index cde029c5..53b2a96e 100644 --- a/samples/python/onnx_custom_plugin/model.py +++ b/samples/python/onnx_custom_plugin/model.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,18 +24,21 @@ MODEL_URL = "https://github.com/onnx/models/raw/e77240a62df68ed13e3138a5812553a552b857bb/text/machine_comprehension/bidirectional_attention_flow/model/bidaf-9.onnx" -WORKING_DIR = os.environ.get("TRT_WORKING_DIR") or os.path.dirname(os.path.realpath(__file__)) -MODEL_DIR = os.path.join(WORKING_DIR, "models") +WORKING_DIR = os.environ.get("TRT_WORKING_DIR") or os.path.dirname( + os.path.realpath(__file__) +) +MODEL_DIR = os.path.join(WORKING_DIR, "models") RAW_MODEL_PATH = os.path.join(MODEL_DIR, "bidaf-9.onnx") TRT_MODEL_PATH = os.path.join(MODEL_DIR, "bidaf-9-trt.onnx") + def _do_graph_surgery(raw_model_path, trt_model_path): graph = gs.import_onnx(onnx.load(raw_model_path)) # Replace unsupported Hardmax with our CustomHardmax op for node in graph.nodes: - if node.op == 'Hardmax': - node.op = 'CustomHardmax' + if node.op == "Hardmax": + node.op = "CustomHardmax" hardmax_node = node # The original onnx model also uses another unsupported op called "Compress". @@ -47,16 +50,16 @@ def _do_graph_surgery(raw_model_path, trt_model_path): # # So, we will replace the subgraph Compress(Transpose_29, Cast(Reshape(Hardmax))) # with the subgraph Einsum(Transpose_29, Hardmax) where the equation in Einsum takes the dot product. - node_by_name = {node.name : node for node in graph.nodes} - transpose_node = node_by_name['Transpose_29'] - compress_node = node_by_name['Compress_31'] + node_by_name = {node.name: node for node in graph.nodes} + transpose_node = node_by_name["Transpose_29"] + compress_node = node_by_name["Compress_31"] einsum_node = gs.Node( - 'Einsum', - 'Dot_of_Hardmax_and_Transpose', - attrs={'equation': 'ij,ij->i'}, # "Dot product" of 2d tensors + "Einsum", + "Dot_of_Hardmax_and_Transpose", + attrs={"equation": "ij,ij->i"}, # "Dot product" of 2d tensors inputs=[hardmax_node.outputs[0], transpose_node.outputs[0]], - outputs=[compress_node.outputs[0]] + outputs=[compress_node.outputs[0]], ) graph.nodes.append(einsum_node) @@ -80,7 +83,9 @@ def _do_graph_surgery(raw_model_path, trt_model_path): # # Later we will feed the model the integer tokens directly. # Note: list conversion is necessary because we modify graph.nodes in the for loop. - category_mapper_nodes = [node for node in graph.nodes if node.op == 'CategoryMapper'] + category_mapper_nodes = [ + node for node in graph.nodes if node.op == "CategoryMapper" + ] for node in category_mapper_nodes: # Remove CategoryMapper node from onnx graph graph.nodes.remove(node) diff --git a/samples/python/onnx_custom_plugin/sample.py b/samples/python/onnx_custom_plugin/sample.py index 7026f0e5..25d4ca36 100644 --- a/samples/python/onnx_custom_plugin/sample.py +++ b/samples/python/onnx_custom_plugin/sample.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -30,7 +30,7 @@ # Reuse some BiDAF-specific methods # ../engine_refit_onnx_bidaf/data_processing.py -sys.path.insert(1, os.path.join(parent_dir, 'engine_refit_onnx_bidaf')) +sys.path.insert(1, os.path.join(parent_dir, "engine_refit_onnx_bidaf")) from engine_refit_onnx_bidaf.data_processing import preprocess, get_inputs # Maxmimum number of words in context or query text. @@ -38,10 +38,12 @@ # Adjustable. MAX_TEXT_LENGTH = 64 -WORKING_DIR = os.environ.get("TRT_WORKING_DIR") or os.path.dirname(os.path.realpath(__file__)) +WORKING_DIR = os.environ.get("TRT_WORKING_DIR") or os.path.dirname( + os.path.realpath(__file__) +) # Path to which trained model will be saved (check README.md) -ENGINE_FILE_PATH = os.path.join(WORKING_DIR, 'bidaf.trt') +ENGINE_FILE_PATH = os.path.join(WORKING_DIR, "bidaf.trt") # Define global logger object (it should be a singleton, # available for TensorRT from anywhere in code). @@ -49,13 +51,16 @@ # (or lower to display more messages) TRT_LOGGER = trt.Logger(trt.Logger.WARNING) + # Builds TensorRT Engine def build_engine(model_path): builder = trt.Builder(TRT_LOGGER) network = builder.create_network(0) config = builder.create_builder_config() - config.set_tactic_sources(config.get_tactic_sources() | 1 << int(trt.TacticSource.CUBLAS)) + config.set_tactic_sources( + config.get_tactic_sources() | 1 << int(trt.TacticSource.CUBLAS) + ) parser = trt.OnnxParser(network, TRT_LOGGER) runtime = trt.Runtime(TRT_LOGGER) @@ -90,17 +95,20 @@ def build_engine(model_path): f.write(plan) return engine + def load_test_case(inputs, context_text, query_text, trt_context): # Part 1: Specify Input shapes cw, cc = preprocess(context_text) qw, qc = preprocess(query_text) for arr in (cw, cc, qw, qc): - assert arr.shape[0] <= MAX_TEXT_LENGTH, "Input context or query is too long! " + \ - "Either decrease the input length or increase MAX_TEXT_LENGTH" - trt_context.set_input_shape('CategoryMapper_4', cw.shape) - trt_context.set_input_shape('CategoryMapper_5', cc.shape) - trt_context.set_input_shape('CategoryMapper_6', qw.shape) - trt_context.set_input_shape('CategoryMapper_7', qc.shape) + assert arr.shape[0] <= MAX_TEXT_LENGTH, ( + "Input context or query is too long! " + + "Either decrease the input length or increase MAX_TEXT_LENGTH" + ) + trt_context.set_input_shape("CategoryMapper_4", cw.shape) + trt_context.set_input_shape("CategoryMapper_5", cc.shape) + trt_context.set_input_shape("CategoryMapper_6", qw.shape) + trt_context.set_input_shape("CategoryMapper_7", qc.shape) # Part 2: load input data cw_flat, cc_flat, qw_flat, qc_flat = get_inputs(context_text, query_text) @@ -138,20 +146,23 @@ def main(): inputs, outputs, bindings, stream = common.allocate_buffers(engine, profile_idx=0) testcases = [ - ('Garry the lion is 5 years old. He lives in the savanna.', 'Where does the lion live?'), - ('A quick brown fox jumps over the lazy dog.', 'What color is the fox?') + ( + "Garry the lion is 5 years old. He lives in the savanna.", + "Where does the lion live?", + ), + ("A quick brown fox jumps over the lazy dog.", "What color is the fox?"), ] print("\n=== Testing ===") - interactive = '--interactive' in sys.argv + interactive = "--interactive" in sys.argv if interactive: context_text = input("Enter context: ") query_text = input("Enter query: ") testcases = [(context_text, query_text)] trt_context = engine.create_execution_context() - for (context_text, query_text) in testcases: + for context_text, query_text in testcases: context_words, _ = preprocess(context_text) @@ -159,7 +170,14 @@ def main(): if not interactive: print(f"Input context: {context_text}") print(f"Input query: {query_text}") - trt_outputs = common.do_inference(trt_context, engine=engine, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) + trt_outputs = common.do_inference( + trt_context, + engine=engine, + bindings=bindings, + inputs=inputs, + outputs=outputs, + stream=stream, + ) start = trt_outputs[1].item() end = trt_outputs[0].item() answer = context_words[start : end + 1].flatten() @@ -168,5 +186,6 @@ def main(): common.free_buffers(inputs, outputs, stream) print("Passed") + if __name__ == "__main__": main() diff --git a/samples/python/onnx_custom_plugin/test_custom_hardmax_plugin.py b/samples/python/onnx_custom_plugin/test_custom_hardmax_plugin.py index c99b78d1..59b08b06 100644 --- a/samples/python/onnx_custom_plugin/test_custom_hardmax_plugin.py +++ b/samples/python/onnx_custom_plugin/test_custom_hardmax_plugin.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,27 +29,35 @@ TRT_LOGGER = trt.Logger(trt.Logger.ERROR) + def hardmax_reference_impl(arr, axis): one_hot = np.zeros(arr.shape, dtype=arr.dtype) argmax = np.expand_dims(np.argmax(arr, axis), axis) - np.put_along_axis(one_hot,argmax,1,axis=axis) + np.put_along_axis(one_hot, argmax, 1, axis=axis) return one_hot + def make_trt_network_and_engine(input_shape, axis): registry = trt.get_plugin_registry() plugin_creator = registry.get_plugin_creator("CustomHardmax", "1") axis_buffer = np.array([axis]) axis_attr = trt.PluginField("axis", axis_buffer, type=trt.PluginFieldType.INT32) field_collection = trt.PluginFieldCollection([axis_attr]) - plugin = plugin_creator.create_plugin(name="CustomHardmax", field_collection=field_collection) + plugin = plugin_creator.create_plugin( + name="CustomHardmax", field_collection=field_collection + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network(0) config = builder.create_builder_config() - config.set_tactic_sources(config.get_tactic_sources() | 1 << int(trt.TacticSource.CUBLAS)) + config.set_tactic_sources( + config.get_tactic_sources() | 1 << int(trt.TacticSource.CUBLAS) + ) runtime = trt.Runtime(TRT_LOGGER) - input_layer = network.add_input(name="input_layer", dtype=trt.float32, shape=input_shape) + input_layer = network.add_input( + name="input_layer", dtype=trt.float32, shape=input_shape + ) hardmax = network.add_plugin_v2(inputs=[input_layer], plugin=plugin) network.mark_output(hardmax.get_output(0)) @@ -58,15 +66,24 @@ def make_trt_network_and_engine(input_shape, axis): return engine + def custom_plugin_impl(input_arr, engine): inputs, outputs, bindings, stream = common.allocate_buffers(engine) context = engine.create_execution_context() inputs[0].host = input_arr.astype(trt.nptype(trt.float32)) - trt_outputs = common.do_inference(context, engine=engine, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) + trt_outputs = common.do_inference( + context, + engine=engine, + bindings=bindings, + inputs=inputs, + outputs=outputs, + stream=stream, + ) output = trt_outputs[0].copy() common.free_buffers(inputs, outputs, stream) return output + def main(): load_plugin_lib() for num_dims in range(1, 8): @@ -80,5 +97,6 @@ def main(): assert np.all(res1 == res2), f"Test failed for shape={shape}, axis={axis}" print("Passed") -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/samples/python/onnx_packnet/convert_to_onnx.py b/samples/python/onnx_packnet/convert_to_onnx.py index df604f96..72c31b72 100644 --- a/samples/python/onnx_packnet/convert_to_onnx.py +++ b/samples/python/onnx_packnet/convert_to_onnx.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -63,17 +63,29 @@ def build_packnet(model_file, args): model_pyt = PackNet01(version="1A") # Convert the model into ONNX - torch.onnx.export(model_pyt, input_pyt, model_file, verbose=args.verbose, opset_version=args.opset) + torch.onnx.export( + model_pyt, input_pyt, model_file, verbose=args.verbose, opset_version=args.opset + ) def main(): parser = argparse.ArgumentParser( description="Exports PackNet01 to ONNX, and post-processes it to insert TensorRT plugins" ) - parser.add_argument("-o", "--output", help="Path to save the generated ONNX model", default="model.onnx") - parser.add_argument("-op", "--opset", type=int, help="ONNX opset to use", default=11) parser.add_argument( - "-v", "--verbose", action="store_true", help="Flag to enable verbose logging for torch.onnx.export" + "-o", + "--output", + help="Path to save the generated ONNX model", + default="model.onnx", + ) + parser.add_argument( + "-op", "--opset", type=int, help="ONNX opset to use", default=11 + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Flag to enable verbose logging for torch.onnx.export", ) args = parser.parse_args() diff --git a/samples/python/onnx_packnet/post_processing.py b/samples/python/onnx_packnet/post_processing.py index 887834c7..33adcf1d 100644 --- a/samples/python/onnx_packnet/post_processing.py +++ b/samples/python/onnx_packnet/post_processing.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,6 +22,7 @@ import numpy as np import torch + # Pad layer subgraph structure in ONNX (specific to opset 11): # Constant # | @@ -65,7 +66,9 @@ def process_pad_nodes(graph): def fold_pad_inputs(node, graph): # Gather the amount of padding in each dimension from pytorch graph. if torch.__version__ < "1.5.0": - pad_values_pyt = node.i(1).i(0).i(0).i(0).i(0).i(0).i(0).i(0).attrs["value"].values + pad_values_pyt = ( + node.i(1).i(0).i(0).i(0).i(0).i(0).i(0).i(0).attrs["value"].values + ) elif torch.__version__ < "2.0.0": pad_values_pyt = node.i(1).i(0).i(0).i(0).i(0).i(0).inputs[0].values else: @@ -80,7 +83,9 @@ def fold_pad_inputs(node, graph): j -= 1 # Change the existing pad tensor to the new onnx_pad values tensor - pads_folded_tensor = gs.Constant(name=node.inputs[1].name, values=np.array(onnx_pad_values)) + pads_folded_tensor = gs.Constant( + name=node.inputs[1].name, values=np.array(onnx_pad_values) + ) node.inputs[1] = pads_folded_tensor @@ -134,7 +139,9 @@ def fold_upsample_inputs(upsample, graph, opset=11): if opset == 9: # Gather the scale factor from mul op in the upsample input subgraph - scale_factor = upsample.i(1).i(1).i(0).i(0).i(0).i(0).i(0).i(0).i(1).attrs["value"].values + scale_factor = ( + upsample.i(1).i(1).i(0).i(0).i(0).i(0).i(0).i(0).i(1).attrs["value"].values + ) # Create the new scales tensor scales = np.array([1.0, 1.0, scale_factor, scale_factor], dtype=np.float32) @@ -148,7 +155,9 @@ def fold_upsample_inputs(upsample, graph, opset=11): sizes_tensor_name = upsample.inputs[3].name # Create the new scales tensor - scale_factor = upsample.i(3).i(1).i().i().i().i().i(0).i(1).attrs["value"].values + scale_factor = ( + upsample.i(3).i(1).i().i().i().i().i(0).i(1).attrs["value"].values + ) scales = np.array([1.0, 1.0, scale_factor, scale_factor], dtype=np.float32) scale_tensor = gs.Constant(name=sizes_tensor_name, values=scales) diff --git a/samples/python/python_plugin/CMakeLists.txt b/samples/python/python_plugin/CMakeLists.txt index 3b8fc1f3..6338ea50 100644 --- a/samples/python/python_plugin/CMakeLists.txt +++ b/samples/python/python_plugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/python/python_plugin/circ_pad_plugin_cpp.py b/samples/python/python_plugin/circ_pad_plugin_cpp.py index a820399f..a7cb8d2f 100644 --- a/samples/python/python_plugin/circ_pad_plugin_cpp.py +++ b/samples/python/python_plugin/circ_pad_plugin_cpp.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,14 +29,29 @@ TrtRunner, ) + def parseArgs(): - parser = argparse.ArgumentParser(description="Options for Circular Padding plugin C++ example") + parser = argparse.ArgumentParser( + description="Options for Circular Padding plugin C++ example" + ) - parser.add_argument('--precision', type=str, default="fp32", choices=["fp32", "fp16"], help="Precision to use for plugin") - parser.add_argument('--plugin-lib', type=str, help="Path to the Circular Padding plugin lib", required=True) + parser.add_argument( + "--precision", + type=str, + default="fp32", + choices=["fp32", "fp16"], + help="Precision to use for plugin", + ) + parser.add_argument( + "--plugin-lib", + type=str, + help="Path to the Circular Padding plugin lib", + required=True, + ) return parser.parse_args() + if __name__ == "__main__": args = parseArgs() @@ -67,15 +82,15 @@ def parseArgs(): # build engine build_engine = EngineFromNetwork( - NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision==np.float16) + NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision == np.float16) ) Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap") # Run - with TrtRunner(build_engine, "trt_runner")as runner: + with TrtRunner(build_engine, "trt_runner") as runner: outputs = runner.infer({"X": X}) Y = outputs["Y"] - + if np.allclose(Y, Y_ref): print("Inference result correct!") else: diff --git a/samples/python/python_plugin/circ_pad_plugin_cuda_python.py b/samples/python/python_plugin/circ_pad_plugin_cuda_python.py index 88ad1ff7..212e3e74 100644 --- a/samples/python/python_plugin/circ_pad_plugin_cuda_python.py +++ b/samples/python/python_plugin/circ_pad_plugin_cuda_python.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,14 +24,14 @@ CreateConfig, EngineFromNetwork, NetworkFromOnnxPath, - TrtRunner + TrtRunner, ) from polygraphy.json import to_json, from_json from utils import checkCudaErrors, KernelHelper, parseArgs, CudaCtxManager from cuda import cuda -circ_pad_half_kernel = r''' +circ_pad_half_kernel = r""" #include extern "C" __global__ void circ_pad_half(half const* X, int const* all_pads, int const* orig_dims, half* Y, int const* Y_shape, int Y_len) { @@ -58,9 +58,9 @@ ]; } } -''' +""" -circ_pad_float_kernel = r''' +circ_pad_float_kernel = r""" extern "C" __global__ void circ_pad_float(float const* X, int const* all_pads, int const* orig_dims, float* Y, int const* Y_shape, int Y_len) { int index = blockIdx.x * blockDim.x + threadIdx.x; @@ -86,7 +86,8 @@ ]; } } -''' +""" + class CircPadPlugin(trt.IPluginV2DynamicExt): def __init__(self, fc=None): @@ -107,7 +108,9 @@ def __init__(self, fc=None): self.cuDevice = None if fc is not None: - assert set([f.name for f in fc]) == set(["pads", "N"]), "Field collection invalid" + assert set([f.name for f in fc]) == set( + ["pads", "N"] + ), "Field collection invalid" for f in fc: if f.name == "pads": self.pads = f.data @@ -116,11 +119,17 @@ def __init__(self, fc=None): def initialize(self): err, self.cuDevice = cuda.cuDeviceGet(0) - trt.get_plugin_registry().acquire_plugin_resource("cuda_ctx", CudaCtxManager(self.cuDevice)) - self.all_pads_d = checkCudaErrors(cuda.cuMemAlloc(np.int32().itemsize * self.N * 2)) - self.orig_dims_d = checkCudaErrors(cuda.cuMemAlloc(np.int32().itemsize * self.N)) + trt.get_plugin_registry().acquire_plugin_resource( + "cuda_ctx", CudaCtxManager(self.cuDevice) + ) + self.all_pads_d = checkCudaErrors( + cuda.cuMemAlloc(np.int32().itemsize * self.N * 2) + ) + self.orig_dims_d = checkCudaErrors( + cuda.cuMemAlloc(np.int32().itemsize * self.N) + ) self.Y_shape_d = checkCudaErrors(cuda.cuMemAlloc(np.int32().itemsize * self.N)) - + def get_output_datatype(self, index, input_types): return input_types[0] @@ -157,11 +166,17 @@ def configure_plugin(self, inp, out): # Copy vectors from host memory to device memory if self.all_pads_d: - checkCudaErrors(cuda.cuMemcpyHtoD(self.all_pads_d, all_pads, all_pads.nbytes)) + checkCudaErrors( + cuda.cuMemcpyHtoD(self.all_pads_d, all_pads, all_pads.nbytes) + ) if self.orig_dims_d: - checkCudaErrors(cuda.cuMemcpyHtoD(self.orig_dims_d, orig_dims, orig_dims.nbytes)) + checkCudaErrors( + cuda.cuMemcpyHtoD(self.orig_dims_d, orig_dims, orig_dims.nbytes) + ) if self.Y_shape_d: - checkCudaErrors(cuda.cuMemcpyHtoD(self.Y_shape_d, out_dims, out_dims.nbytes)) + checkCudaErrors( + cuda.cuMemcpyHtoD(self.Y_shape_d, out_dims, out_dims.nbytes) + ) self.Y_len_d = np.prod(out_dims) @@ -205,25 +220,43 @@ def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream): if inp_dtype == np.float32: kernelHelper = KernelHelper(circ_pad_float_kernel, int(self.cuDevice)) - _circ_pad_float_kernel = kernelHelper.getFunction(b'circ_pad_float') - checkCudaErrors(cuda.cuLaunchKernel(_circ_pad_float_kernel, - numBlocks, 1, 1, - blockSize, 1, 1, - 0, - stream_ptr, - kernelArgs, 0)) + _circ_pad_float_kernel = kernelHelper.getFunction(b"circ_pad_float") + checkCudaErrors( + cuda.cuLaunchKernel( + _circ_pad_float_kernel, + numBlocks, + 1, + 1, + blockSize, + 1, + 1, + 0, + stream_ptr, + kernelArgs, + 0, + ) + ) elif inp_dtype == np.float16: kernelHelper = KernelHelper(circ_pad_half_kernel, int(self.cuDevice)) - _circ_pad_half_kernel = kernelHelper.getFunction(b'circ_pad_half') - checkCudaErrors(cuda.cuLaunchKernel(_circ_pad_half_kernel, - numBlocks, 1, 1, - blockSize, 1, 1, - 0, - stream_ptr, - kernelArgs, 0)) + _circ_pad_half_kernel = kernelHelper.getFunction(b"circ_pad_half") + checkCudaErrors( + cuda.cuLaunchKernel( + _circ_pad_half_kernel, + numBlocks, + 1, + 1, + blockSize, + 1, + 1, + 0, + stream_ptr, + kernelArgs, + 0, + ) + ) else: raise ValueError("inp_dtype not valid") - + def clone(self): cloned_plugin = CircPadPlugin() cloned_plugin.__dict__.update(self.__dict__) @@ -239,7 +272,7 @@ def terminate(self): trt.get_plugin_registry().release_plugin_resource("cuda_ctx") - # + # # The following defaults take effect since the respective methods are not overriden # @@ -248,7 +281,7 @@ def terminate(self): # def get_workspace_size(self, input_desc, output_desc): # return 0 - + # def destroy(self): # pass @@ -259,10 +292,12 @@ def __init__(self): self.name = "CircPadPlugin" self.plugin_namespace = "" self.plugin_version = "1" - self.field_names = trt.PluginFieldCollection([ - trt.PluginField("pads", np.array([]), trt.PluginFieldType.INT32), - trt.PluginField("N", np.array([]), trt.PluginFieldType.INT32) - ]) + self.field_names = trt.PluginFieldCollection( + [ + trt.PluginField("pads", np.array([]), trt.PluginFieldType.INT32), + trt.PluginField("N", np.array([]), trt.PluginFieldType.INT32), + ] + ) def create_plugin(self, name, fc): return CircPadPlugin(fc) @@ -273,12 +308,13 @@ def deserialize_plugin(self, name, data): deserialized.__dict__.update(j) return deserialized + if __name__ == "__main__": args = parseArgs() # Initialize CUDA Driver API - err, = cuda.cuInit(0) + (err,) = cuda.cuInit(0) # Retrieve handle for device 0 err, cuDevice = cuda.cuDeviceGet(0) @@ -319,12 +355,12 @@ def deserialize_plugin(self, name, data): # build engine build_engine = EngineFromNetwork( - NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision==np.float16) + NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision == np.float16) ) Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap") # Run - with TrtRunner(build_engine, "trt_runner")as runner: + with TrtRunner(build_engine, "trt_runner") as runner: outputs = runner.infer({"X": X}) Y = outputs["Y"] diff --git a/samples/python/python_plugin/circ_pad_plugin_cupy.py b/samples/python/python_plugin/circ_pad_plugin_cupy.py index 82b271cc..19545a11 100644 --- a/samples/python/python_plugin/circ_pad_plugin_cupy.py +++ b/samples/python/python_plugin/circ_pad_plugin_cupy.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -27,14 +27,15 @@ CreateConfig, EngineFromNetwork, NetworkFromOnnxPath, - TrtRunner + TrtRunner, ) from polygraphy.json import to_json, from_json from utils import volume, parseArgs -circ_pad_half_kernel = cp.RawKernel(r''' +circ_pad_half_kernel = cp.RawKernel( + r""" #include extern "C" __global__ void circ_pad_half(half const* X, int const* all_pads, int const* orig_dims, half* Y, int const* Y_shape, int const* Y_len) { @@ -61,9 +62,12 @@ ]; } } -''', 'circ_pad_half') +""", + "circ_pad_half", +) -circ_pad_float_kernel = cp.RawKernel(r''' +circ_pad_float_kernel = cp.RawKernel( + r""" extern "C" __global__ void circ_pad_float(float const* X, int const* all_pads, int const* orig_dims, float* Y, int const* Y_shape, int const* Y_len) { int index = blockIdx.x * blockDim.x + threadIdx.x; @@ -89,14 +93,17 @@ ]; } } -''', 'circ_pad_float') +""", + "circ_pad_float", +) + class CircPadPlugin(trt.IPluginV2DynamicExt): def __init__(self, fc=None): trt.IPluginV2DynamicExt.__init__(self) self.pads = [] self.X_shape = [] - + self.num_outputs = 1 self.plugin_namespace = "" self.plugin_type = "CircPadPlugin" @@ -190,9 +197,31 @@ def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream): with cuda_stream: if inp_dtype == np.float32: - circ_pad_float_kernel((numBlocks,), (blockSize,), (a, self.all_pads_d, self.orig_dims_d, c, self.Y_shape_d, self.Y_len_d)) + circ_pad_float_kernel( + (numBlocks,), + (blockSize,), + ( + a, + self.all_pads_d, + self.orig_dims_d, + c, + self.Y_shape_d, + self.Y_len_d, + ), + ) elif inp_dtype == np.float16: - circ_pad_half_kernel((numBlocks,), (blockSize,), (a, self.all_pads_d, self.orig_dims_d, c, self.Y_shape_d, self.Y_len_d)) + circ_pad_half_kernel( + (numBlocks,), + (blockSize,), + ( + a, + self.all_pads_d, + self.orig_dims_d, + c, + self.Y_shape_d, + self.Y_len_d, + ), + ) else: raise ValueError("inp_dtype not valid") @@ -201,7 +230,7 @@ def clone(self): cloned_plugin.__dict__.update(self.__dict__) return cloned_plugin - # + # # The following defaults take effect since the respective methods are not overriden # @@ -213,17 +242,18 @@ def clone(self): # def get_workspace_size(self, input_desc, output_desc): # return 0 - + # def destroy(self): # pass # def terminate(self): # pass + class CircPadPluginCreator(trt.IPluginCreator): def __init__(self): trt.IPluginCreator.__init__(self) - + self.name = "CircPadPlugin" self.plugin_namespace = "" self.plugin_version = "1" @@ -233,13 +263,14 @@ def __init__(self): def create_plugin(self, name, fc): return CircPadPlugin(fc) - + def deserialize_plugin(self, name, data): j = dict(from_json(data.decode("utf-8"))) deserialized = CircPadPlugin() deserialized.__dict__.update(j) return deserialized + if __name__ == "__main__": args = parseArgs() @@ -275,12 +306,12 @@ def deserialize_plugin(self, name, data): # build engine build_engine = EngineFromNetwork( - NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision==np.float16) + NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision == np.float16) ) Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap") # Run - with TrtRunner(build_engine, "trt_runner")as runner: + with TrtRunner(build_engine, "trt_runner") as runner: outputs = runner.infer({"X": X}) Y = outputs["Y"] diff --git a/samples/python/python_plugin/circ_pad_plugin_inetdef_cuda_python.py b/samples/python/python_plugin/circ_pad_plugin_inetdef_cuda_python.py index 60208ab3..6abf526f 100644 --- a/samples/python/python_plugin/circ_pad_plugin_inetdef_cuda_python.py +++ b/samples/python/python_plugin/circ_pad_plugin_inetdef_cuda_python.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,7 +23,7 @@ CreateConfig, TrtRunner, create_network, - engine_from_network + engine_from_network, ) from polygraphy.json import to_json, from_json @@ -31,7 +31,7 @@ from utils import checkCudaErrors, KernelHelper, parseArgs, CudaCtxManager from cuda import cuda -circ_pad_half_kernel = r''' +circ_pad_half_kernel = r""" #include extern "C" __global__ void circ_pad_half(half const* X, int const* all_pads, int const* orig_dims, half* Y, int const* Y_shape, int Y_len) { @@ -58,9 +58,9 @@ ]; } } -''' +""" -circ_pad_float_kernel = r''' +circ_pad_float_kernel = r""" extern "C" __global__ void circ_pad_float(float const* X, int const* all_pads, int const* orig_dims, float* Y, int const* Y_shape, int Y_len) { int index = blockIdx.x * blockDim.x + threadIdx.x; @@ -86,7 +86,8 @@ ]; } } -''' +""" + class CircPadPlugin(trt.IPluginV2DynamicExt): def __init__(self, fc=None): @@ -107,7 +108,9 @@ def __init__(self, fc=None): self.cuDevice = None if fc is not None: - assert set([f.name for f in fc]) == set(["pads", "N"]), "Field collection invalid" + assert set([f.name for f in fc]) == set( + ["pads", "N"] + ), "Field collection invalid" for f in fc: if f.name == "pads": self.pads = f.data @@ -116,11 +119,17 @@ def __init__(self, fc=None): def initialize(self): err, self.cuDevice = cuda.cuDeviceGet(0) - trt.get_plugin_registry().acquire_plugin_resource("cuda_ctx", CudaCtxManager(self.cuDevice)) - self.all_pads_d = checkCudaErrors(cuda.cuMemAlloc(np.int32().itemsize * self.N * 2)) - self.orig_dims_d = checkCudaErrors(cuda.cuMemAlloc(np.int32().itemsize * self.N)) + trt.get_plugin_registry().acquire_plugin_resource( + "cuda_ctx", CudaCtxManager(self.cuDevice) + ) + self.all_pads_d = checkCudaErrors( + cuda.cuMemAlloc(np.int32().itemsize * self.N * 2) + ) + self.orig_dims_d = checkCudaErrors( + cuda.cuMemAlloc(np.int32().itemsize * self.N) + ) self.Y_shape_d = checkCudaErrors(cuda.cuMemAlloc(np.int32().itemsize * self.N)) - + def get_output_datatype(self, index, input_types): return input_types[0] @@ -157,11 +166,17 @@ def configure_plugin(self, inp, out): # Copy vectors from host memory to device memory if self.all_pads_d: - checkCudaErrors(cuda.cuMemcpyHtoD(self.all_pads_d, all_pads, all_pads.nbytes)) + checkCudaErrors( + cuda.cuMemcpyHtoD(self.all_pads_d, all_pads, all_pads.nbytes) + ) if self.orig_dims_d: - checkCudaErrors(cuda.cuMemcpyHtoD(self.orig_dims_d, orig_dims, orig_dims.nbytes)) + checkCudaErrors( + cuda.cuMemcpyHtoD(self.orig_dims_d, orig_dims, orig_dims.nbytes) + ) if self.Y_shape_d: - checkCudaErrors(cuda.cuMemcpyHtoD(self.Y_shape_d, out_dims, out_dims.nbytes)) + checkCudaErrors( + cuda.cuMemcpyHtoD(self.Y_shape_d, out_dims, out_dims.nbytes) + ) self.Y_len_d = np.prod(out_dims) @@ -205,25 +220,43 @@ def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream): if inp_dtype == np.float32: kernelHelper = KernelHelper(circ_pad_float_kernel, int(self.cuDevice)) - _circ_pad_float_kernel = kernelHelper.getFunction(b'circ_pad_float') - checkCudaErrors(cuda.cuLaunchKernel(_circ_pad_float_kernel, - numBlocks, 1, 1, - blockSize, 1, 1, - 0, - stream_ptr, - kernelArgs, 0)) + _circ_pad_float_kernel = kernelHelper.getFunction(b"circ_pad_float") + checkCudaErrors( + cuda.cuLaunchKernel( + _circ_pad_float_kernel, + numBlocks, + 1, + 1, + blockSize, + 1, + 1, + 0, + stream_ptr, + kernelArgs, + 0, + ) + ) elif inp_dtype == np.float16: kernelHelper = KernelHelper(circ_pad_half_kernel, int(self.cuDevice)) - _circ_pad_half_kernel = kernelHelper.getFunction(b'circ_pad_half') - checkCudaErrors(cuda.cuLaunchKernel(_circ_pad_half_kernel, - numBlocks, 1, 1, - blockSize, 1, 1, - 0, - stream_ptr, - kernelArgs, 0)) + _circ_pad_half_kernel = kernelHelper.getFunction(b"circ_pad_half") + checkCudaErrors( + cuda.cuLaunchKernel( + _circ_pad_half_kernel, + numBlocks, + 1, + 1, + blockSize, + 1, + 1, + 0, + stream_ptr, + kernelArgs, + 0, + ) + ) else: raise ValueError("inp_dtype not valid") - + def clone(self): cloned_plugin = CircPadPlugin() cloned_plugin.__dict__.update(self.__dict__) @@ -239,7 +272,7 @@ def terminate(self): plg_registry.release_plugin_resource("cuda_ctx") - # + # # The following defaults take effect since the respective methods are not overriden # @@ -248,7 +281,7 @@ def terminate(self): # def get_workspace_size(self, input_desc, output_desc): # return 0 - + # def destroy(self): # pass @@ -259,10 +292,12 @@ def __init__(self): self.name = "CircPadPlugin" self.plugin_namespace = "" self.plugin_version = "1" - self.field_names = trt.PluginFieldCollection([ - trt.PluginField("pads", np.array([]), trt.PluginFieldType.INT32), - trt.PluginField("N", np.array([]), trt.PluginFieldType.INT32) - ]) + self.field_names = trt.PluginFieldCollection( + [ + trt.PluginField("pads", np.array([]), trt.PluginFieldType.INT32), + trt.PluginField("N", np.array([]), trt.PluginFieldType.INT32), + ] + ) def create_plugin(self, name, fc): return CircPadPlugin(fc) @@ -273,13 +308,14 @@ def deserialize_plugin(self, name, data): deserialized.__dict__.update(j) return deserialized + if __name__ == "__main__": args = parseArgs() precision = np.float32 if args.precision == "fp32" else np.float16 # Initialize CUDA Driver API - err, = cuda.cuInit(0) + (err,) = cuda.cuInit(0) # Retrieve handle for device 0 err, cuDevice = cuda.cuDeviceGet(0) @@ -306,28 +342,36 @@ def deserialize_plugin(self, name, data): builder, network = create_network() plg_creator = plg_registry.get_plugin_creator("CircPadPlugin", "1", "") plugin_fields_list = [ - trt.PluginField("pads", np.array(pads, dtype=np.int32), trt.PluginFieldType.INT32), + trt.PluginField( + "pads", np.array(pads, dtype=np.int32), trt.PluginFieldType.INT32 + ), trt.PluginField("N", np.array([4], dtype=np.int32), trt.PluginFieldType.INT32), ] pfc = trt.PluginFieldCollection(plugin_fields_list) plugin = plg_creator.create_plugin("CircPadPlugin", pfc) # Populate network - input_X = network.add_input(name="X", dtype=trt.float32 if precision==np.float32 else trt.float16, shape=X.shape) + input_X = network.add_input( + name="X", + dtype=trt.float32 if precision == np.float32 else trt.float16, + shape=X.shape, + ) out = network.add_plugin_v2([input_X], plugin) out.get_output(0).name = "Y" network.mark_output(tensor=out.get_output(0)) # Build engine config = builder.create_builder_config() - engine = engine_from_network((builder, network), CreateConfig(fp16=precision==trt.float16)) + engine = engine_from_network( + (builder, network), CreateConfig(fp16=precision == trt.float16) + ) Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap") # Run - with TrtRunner(engine, "trt_runner")as runner: + with TrtRunner(engine, "trt_runner") as runner: outputs = runner.infer({"X": X}) Y = outputs["Y"] - + if np.allclose(Y, Y_ref): print("Inference result correct!") else: diff --git a/samples/python/python_plugin/circ_pad_plugin_numba.py b/samples/python/python_plugin/circ_pad_plugin_numba.py index 2cc0bfab..d568419d 100644 --- a/samples/python/python_plugin/circ_pad_plugin_numba.py +++ b/samples/python/python_plugin/circ_pad_plugin_numba.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -32,6 +32,7 @@ from polygraphy.json import to_json, from_json from utils import volume, parseArgs + @cuda.jit def circ_pad(X, all_pads, orig_dims, Y, Y_shape, Y_len): index = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x @@ -57,6 +58,7 @@ def circ_pad(X, all_pads, orig_dims, Y, Y_shape, Y_len): ) ] + class CircPadPlugin(trt.IPluginV2DynamicExt): def __init__(self, fc=None): trt.IPluginV2DynamicExt.__init__(self) @@ -76,7 +78,7 @@ def get_output_datatype(self, index, input_types): return input_types[0] def get_output_dimensions(self, output_index, inputs, exprBuilder): - + output_dims = trt.DimsExprs(inputs[0]) for i in range(np.size(self.pads) // 2): @@ -163,8 +165,8 @@ def clone(self): cloned_plugin = CircPadPlugin() cloned_plugin.__dict__.update(self.__dict__) return cloned_plugin - - # + + # # The following defaults take effect since the respective methods are not overriden # @@ -176,7 +178,7 @@ def clone(self): # def get_workspace_size(self, input_desc, output_desc): # return 0 - + # def destroy(self): # pass @@ -203,6 +205,7 @@ def deserialize_plugin(self, name, data): deserialized.__dict__.update(j) return deserialized + if __name__ == "__main__": args = parseArgs() @@ -234,12 +237,12 @@ def deserialize_plugin(self, name, data): # build engine build_engine = EngineFromNetwork( - NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision==np.float16) + NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision == np.float16) ) Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap") # Run - with TrtRunner(build_engine, "trt_runner")as runner: + with TrtRunner(build_engine, "trt_runner") as runner: outputs = runner.infer({"X": X}) Y = outputs["Y"] diff --git a/samples/python/python_plugin/circ_pad_plugin_torch.py b/samples/python/python_plugin/circ_pad_plugin_torch.py index 8b036469..76e8cc41 100644 --- a/samples/python/python_plugin/circ_pad_plugin_torch.py +++ b/samples/python/python_plugin/circ_pad_plugin_torch.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -33,12 +33,13 @@ from utils import volume, parseArgs + class CircPadPlugin(trt.IPluginV2DynamicExt): def __init__(self, fc=None): trt.IPluginV2DynamicExt.__init__(self) self.pads = [] self.X_shape = [] - + self.num_outputs = 1 self.plugin_namespace = "" self.plugin_type = "CircPadPlugin" @@ -110,10 +111,10 @@ def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream): a_d = cp.ndarray(tuple(input_desc[0].dims), dtype=inp_dtype, memptr=a_ptr) c_d = cp.ndarray((volume(output_desc[0].dims)), dtype=inp_dtype, memptr=c_ptr) - a_t = torch.as_tensor(a_d, device='cuda') + a_t = torch.as_tensor(a_d, device="cuda") # Use PyTorch functional op - no need to write kernel - out = torch.nn.functional.pad(a_t, self.pads.tolist(), mode='circular') + out = torch.nn.functional.pad(a_t, self.pads.tolist(), mode="circular") cp.copyto(c_d, cp.reshape(cp.asarray(out), (-1,))) return 0 @@ -123,7 +124,7 @@ def clone(self): cloned_plugin.__dict__.update(self.__dict__) return cloned_plugin - # + # # The following defaults take effect since the respective methods are not overriden # @@ -135,7 +136,7 @@ def clone(self): # def get_workspace_size(self, input_desc, output_desc): # return 0 - + # def destroy(self): # pass @@ -162,6 +163,7 @@ def deserialize_plugin(self, name, data): deserialized.__dict__.update(j) return deserialized + if __name__ == "__main__": args = parseArgs() @@ -193,12 +195,12 @@ def deserialize_plugin(self, name, data): # build engine build_engine = EngineFromNetwork( - NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision==np.float16) + NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision == np.float16) ) Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap") # Run - with TrtRunner(build_engine, "trt_runner")as runner: + with TrtRunner(build_engine, "trt_runner") as runner: outputs = runner.infer({"X": X}) Y = outputs["Y"] diff --git a/samples/python/python_plugin/circ_pad_plugin_triton.py b/samples/python/python_plugin/circ_pad_plugin_triton.py index 93b5f0fd..686d4e5c 100644 --- a/samples/python/python_plugin/circ_pad_plugin_triton.py +++ b/samples/python/python_plugin/circ_pad_plugin_triton.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -36,13 +36,26 @@ from utils import volume, parseArgs + @triton.jit -def circ_pad(X, - all_pads_0, all_pads_2, all_pads_4, all_pads_6, - orig_dims_0, orig_dims_1, orig_dims_2, orig_dims_3, - Y, - Y_shape_1, Y_shape_2, Y_shape_3, - X_len, Y_len, BLOCK_SIZE: tl.constexpr,): +def circ_pad( + X, + all_pads_0, + all_pads_2, + all_pads_4, + all_pads_6, + orig_dims_0, + orig_dims_1, + orig_dims_2, + orig_dims_3, + Y, + Y_shape_1, + Y_shape_2, + Y_shape_3, + X_len, + Y_len, + BLOCK_SIZE: tl.constexpr, +): pid = tl.program_id(0) i = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE) @@ -58,7 +71,12 @@ def circ_pad(X, j2 = (i2 - all_pads_4 + orig_dims_2) % orig_dims_2 j3 = (i3 - all_pads_6 + orig_dims_3) % orig_dims_3 - load_idx = orig_dims_3 * orig_dims_2 * orig_dims_1 * j0 + orig_dims_3 * orig_dims_2 * j1 + orig_dims_3 * j2 + j3 + load_idx = ( + orig_dims_3 * orig_dims_2 * orig_dims_1 * j0 + + orig_dims_3 * orig_dims_2 * j1 + + orig_dims_3 * j2 + + j3 + ) mask_x = load_idx < X_len x = tl.load(X + load_idx, mask=mask_x) @@ -143,8 +161,8 @@ def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream): a_d = cp.ndarray((volume(input_desc[0].dims)), dtype=inp_dtype, memptr=a_ptr) c_d = cp.ndarray((volume(output_desc[0].dims)), dtype=inp_dtype, memptr=c_ptr) - a_t = torch.as_tensor(a_d, device='cuda') - c_t = torch.as_tensor(c_d, device='cuda') + a_t = torch.as_tensor(a_d, device="cuda") + c_t = torch.as_tensor(c_d, device="cuda") N = len(self.X_shape) all_pads = np.zeros((N * 2,), dtype=np.int32) @@ -163,12 +181,23 @@ def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream): blockSize = 256 numBlocks = (int((np.prod(out_dims) + blockSize - 1) // blockSize),) - circ_pad[numBlocks](a_t, - all_pads[0], all_pads[2], all_pads[4], all_pads[6], - orig_dims[0], orig_dims[1], orig_dims[2], orig_dims[3], + circ_pad[numBlocks]( + a_t, + all_pads[0], + all_pads[2], + all_pads[4], + all_pads[6], + orig_dims[0], + orig_dims[1], + orig_dims[2], + orig_dims[3], c_t, - out_dims[1], out_dims[2], out_dims[3], - int(np.prod(orig_dims)), int(np.prod(out_dims)), BLOCK_SIZE=256 + out_dims[1], + out_dims[2], + out_dims[3], + int(np.prod(orig_dims)), + int(np.prod(out_dims)), + BLOCK_SIZE=256, ) return 0 @@ -178,7 +207,7 @@ def clone(self): cloned_plugin.__dict__.update(self.__dict__) return cloned_plugin - # + # # The following defaults take effect since the respective methods are not overriden # @@ -190,7 +219,7 @@ def clone(self): # def get_workspace_size(self, input_desc, output_desc): # return 0 - + # def destroy(self): # pass @@ -217,6 +246,7 @@ def deserialize_plugin(self, name, data): deserialized.__dict__.update(j) return deserialized + if __name__ == "__main__": args = parseArgs() @@ -248,12 +278,12 @@ def deserialize_plugin(self, name, data): # build engine build_engine = EngineFromNetwork( - NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision==np.float16) + NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision == np.float16) ) Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap") # Run - with TrtRunner(build_engine, "trt_runner")as runner: + with TrtRunner(build_engine, "trt_runner") as runner: outputs = runner.infer({"X": X}) Y = outputs["Y"] diff --git a/samples/python/python_plugin/circ_plugin_cpp/circ_pad_plugin.cu b/samples/python/python_plugin/circ_plugin_cpp/circ_pad_plugin.cu index 8e06a025..0bcffd56 100644 --- a/samples/python/python_plugin/circ_plugin_cpp/circ_pad_plugin.cu +++ b/samples/python/python_plugin/circ_plugin_cpp/circ_pad_plugin.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -109,8 +109,7 @@ __global__ void circPadKernel( int32_t j2 = (i2 - allPads[4] + origDims[2]) % origDims[2]; int32_t j3 = (i3 - allPads[6] + origDims[3]) % origDims[3]; - y[i] = x[origDims[3] * origDims[2] * origDims[1] * j0 + origDims[3] * origDims[2] * j1 + origDims[3] * j2 - + j3]; + y[i] = x[origDims[3] * origDims[2] * origDims[1] * j0 + origDims[3] * origDims[2] * j1 + origDims[3] * j2 + j3]; } } diff --git a/samples/python/python_plugin/utils.py b/samples/python/python_plugin/utils.py index 4015b72c..1a1aa16c 100644 --- a/samples/python/python_plugin/utils.py +++ b/samples/python/python_plugin/utils.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,15 +23,26 @@ import tensorrt as trt + def parseArgs(): - parser = argparse.ArgumentParser(description="Options for Circular Padding plugin C++ example") - parser.add_argument('--precision', type=str, default="fp32", choices=["fp32", "fp16"], help="Precision to use for plugin") + parser = argparse.ArgumentParser( + description="Options for Circular Padding plugin C++ example" + ) + parser.add_argument( + "--precision", + type=str, + default="fp32", + choices=["fp32", "fp16"], + help="Precision to use for plugin", + ) return parser.parse_args() + def volume(d): return np.prod(d) + # Taken from https://github.com/NVIDIA/cuda-python/blob/main/examples/common/helper_cuda.py def checkCudaErrors(result): def _cudaGetErrorEnum(error): @@ -43,9 +54,14 @@ def _cudaGetErrorEnum(error): elif isinstance(error, nvrtc.nvrtcResult): return nvrtc.nvrtcGetErrorString(error)[1] else: - raise RuntimeError('Unknown error type: {}'.format(error)) + raise RuntimeError("Unknown error type: {}".format(error)) + if result[0].value: - raise RuntimeError("CUDA error code={}({})".format(result[0].value, _cudaGetErrorEnum(result[0]))) + raise RuntimeError( + "CUDA error code={}({})".format( + result[0].value, _cudaGetErrorEnum(result[0]) + ) + ) if len(result) == 1: return None elif len(result) == 2: @@ -53,34 +69,50 @@ def _cudaGetErrorEnum(error): else: return result[1:] + # Taken from https://github.com/NVIDIA/cuda-python/blob/main/examples/common/common.py class KernelHelper: def __init__(self, code, devID): - prog = checkCudaErrors(nvrtc.nvrtcCreateProgram(str.encode(code), b'sourceCode.cu', 0, [], [])) - CUDA_HOME = os.getenv('CUDA_HOME') + prog = checkCudaErrors( + nvrtc.nvrtcCreateProgram(str.encode(code), b"sourceCode.cu", 0, [], []) + ) + CUDA_HOME = os.getenv("CUDA_HOME") if CUDA_HOME == None: - CUDA_HOME = os.getenv('CUDA_PATH') + CUDA_HOME = os.getenv("CUDA_PATH") if CUDA_HOME == None: - raise RuntimeError('Environment variable CUDA_HOME or CUDA_PATH is not set') - include_dirs = os.path.join(CUDA_HOME, 'include') + raise RuntimeError("Environment variable CUDA_HOME or CUDA_PATH is not set") + include_dirs = os.path.join(CUDA_HOME, "include") # Initialize CUDA checkCudaErrors(cudart.cudaFree(0)) - major = checkCudaErrors(cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, devID)) - minor = checkCudaErrors(cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, devID)) + major = checkCudaErrors( + cudart.cudaDeviceGetAttribute( + cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, devID + ) + ) + minor = checkCudaErrors( + cudart.cudaDeviceGetAttribute( + cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, devID + ) + ) _, nvrtc_minor = checkCudaErrors(nvrtc.nvrtcVersion()) - use_cubin = (nvrtc_minor >= 1) - prefix = 'sm' if use_cubin else 'compute' - arch_arg = bytes(f'--gpu-architecture={prefix}_{major}{minor}', 'ascii') + use_cubin = nvrtc_minor >= 1 + prefix = "sm" if use_cubin else "compute" + arch_arg = bytes(f"--gpu-architecture={prefix}_{major}{minor}", "ascii") try: - opts = [b'--fmad=true', arch_arg, '--include-path={}'.format(include_dirs).encode('UTF-8'), - b'--std=c++11', b'-default-device'] + opts = [ + b"--fmad=true", + arch_arg, + "--include-path={}".format(include_dirs).encode("UTF-8"), + b"--std=c++11", + b"-default-device", + ] checkCudaErrors(nvrtc.nvrtcCompileProgram(prog, len(opts), opts)) except RuntimeError as err: logSize = checkCudaErrors(nvrtc.nvrtcGetProgramLogSize(prog)) - log = b' ' * logSize + log = b" " * logSize checkCudaErrors(nvrtc.nvrtcGetProgramLog(prog, log)) print(log.decode()) print(err) @@ -88,11 +120,11 @@ def __init__(self, code, devID): if use_cubin: dataSize = checkCudaErrors(nvrtc.nvrtcGetCUBINSize(prog)) - data = b' ' * dataSize + data = b" " * dataSize checkCudaErrors(nvrtc.nvrtcGetCUBIN(prog, data)) else: dataSize = checkCudaErrors(nvrtc.nvrtcGetPTXSize(prog)) - data = b' ' * dataSize + data = b" " * dataSize checkCudaErrors(nvrtc.nvrtcGetPTX(prog, data)) self.module = checkCudaErrors(cuda.cuModuleLoadData(np.char.array(data))) @@ -100,8 +132,9 @@ def __init__(self, code, devID): def getFunction(self, name): return checkCudaErrors(cuda.cuModuleGetFunction(self.module, name)) + class CudaCtxManager(trt.IPluginResource): - def __init__(self, device = None): + def __init__(self, device=None): trt.IPluginResource.__init__(self) self.device = device self.cuda_ctx = None diff --git a/samples/python/scripts/download_mnist_data.sh b/samples/python/scripts/download_mnist_data.sh index 809bcbc9..196ddd4e 100755 --- a/samples/python/scripts/download_mnist_data.sh +++ b/samples/python/scripts/download_mnist_data.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/python/scripts/download_mnist_pgms.py b/samples/python/scripts/download_mnist_pgms.py index a1ee0cba..dee877fe 100644 --- a/samples/python/scripts/download_mnist_pgms.py +++ b/samples/python/scripts/download_mnist_pgms.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/python/simple_progress_monitor/simple_progress_monitor.py b/samples/python/simple_progress_monitor/simple_progress_monitor.py index 9ed6c6ba..fe54f720 100644 --- a/samples/python/simple_progress_monitor/simple_progress_monitor.py +++ b/samples/python/simple_progress_monitor/simple_progress_monitor.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -36,6 +36,7 @@ class ModelData(object): # We can convert TensorRT data types to numpy types with trt.nptype(). DTYPE = trt.float32 + # This is a simple ASCII-art progress monitor comparable to the C++ version in sample_progress_monitor. class SimpleProgressMonitor(trt.IProgressMonitor): def __init__(self): @@ -46,10 +47,15 @@ def __init__(self): def phase_start(self, phase_name, parent_phase, num_steps): try: if parent_phase is not None: - nbIndents = 1 + self._active_phases[parent_phase]['nbIndents'] + nbIndents = 1 + self._active_phases[parent_phase]["nbIndents"] else: nbIndents = 0 - self._active_phases[phase_name] = { 'title': phase_name, 'steps': 0, 'num_steps': num_steps, 'nbIndents': nbIndents } + self._active_phases[phase_name] = { + "title": phase_name, + "steps": 0, + "num_steps": num_steps, + "nbIndents": nbIndents, + } self._redraw() except KeyboardInterrupt: # The phase_start callback cannot directly cancel the build, so request the cancellation from within step_complete. @@ -58,13 +64,13 @@ def phase_start(self, phase_name, parent_phase, num_steps): def phase_finish(self, phase_name): try: del self._active_phases[phase_name] - self._redraw(blank_lines=1) # Clear the removed phase. + self._redraw(blank_lines=1) # Clear the removed phase. except KeyboardInterrupt: _step_result = False def step_complete(self, phase_name, step): try: - self._active_phases[phase_name]['steps'] = step + self._active_phases[phase_name]["steps"] = step self._redraw() return self._step_result except KeyboardInterrupt: @@ -75,32 +81,35 @@ def _redraw(self, *, blank_lines=0): # The Python curses module is not widely available on Windows platforms. # Instead, this function uses raw terminal escape sequences. See the sample documentation for references. def clear_line(): - print('\x1B[2K', end='') + print("\x1B[2K", end="") + def move_to_start_of_line(): - print('\x1B[0G', end='') + print("\x1B[0G", end="") + def move_cursor_up(lines): - print('\x1B[{}A'.format(lines), end='') + print("\x1B[{}A".format(lines), end="") def progress_bar(steps, num_steps): INNER_WIDTH = 10 completed_bar_chars = int(INNER_WIDTH * steps / float(num_steps)) - return '[{}{}]'.format( - '=' * completed_bar_chars, - '-' * (INNER_WIDTH - completed_bar_chars)) + return "[{}{}]".format( + "=" * completed_bar_chars, "-" * (INNER_WIDTH - completed_bar_chars) + ) # Set max_cols to a default of 200 if not run in interactive mode. max_cols = os.get_terminal_size().columns if sys.stdout.isatty() else 200 move_to_start_of_line() for phase in self._active_phases.values(): - phase_prefix = '{indent}{bar} {title}'.format( - indent = ' ' * phase['nbIndents'], - bar = progress_bar(phase['steps'], phase['num_steps']), - title = phase['title']) - phase_suffix = '{steps}/{num_steps}'.format(**phase) + phase_prefix = "{indent}{bar} {title}".format( + indent=" " * phase["nbIndents"], + bar=progress_bar(phase["steps"], phase["num_steps"]), + title=phase["title"], + ) + phase_suffix = "{steps}/{num_steps}".format(**phase) allowable_prefix_chars = max_cols - len(phase_suffix) - 2 if allowable_prefix_chars < len(phase_prefix): - phase_prefix = phase_prefix[0:allowable_prefix_chars-3] + '...' + phase_prefix = phase_prefix[0 : allowable_prefix_chars - 3] + "..." clear_line() print(phase_prefix, phase_suffix) for line in range(blank_lines): @@ -109,16 +118,20 @@ def progress_bar(steps, num_steps): move_cursor_up(len(self._active_phases) + blank_lines) sys.stdout.flush() + # You can set the logger severity higher to suppress messages (or lower to display more messages). TRT_LOGGER = trt.Logger(trt.Logger.WARNING) + # The Onnx path is used for Onnx models. def build_engine_onnx(model_file): builder = trt.Builder(TRT_LOGGER) network = builder.create_network(0) config = builder.create_builder_config() if not sys.stdout.isatty(): - print("Warning: This sample should be run from an interactive terminal in order to showcase the progress monitor correctly.") + print( + "Warning: This sample should be run from an interactive terminal in order to showcase the progress monitor correctly." + ) config.progress_monitor = SimpleProgressMonitor() parser = trt.OnnxParser(network, TRT_LOGGER) @@ -186,7 +199,14 @@ def main(): test_case = load_normalized_test_case(test_image, inputs[0].host) # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the # probability that the image corresponds to that label - trt_outputs = common.do_inference(context, engine=engine, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) + trt_outputs = common.do_inference( + context, + engine=engine, + bindings=bindings, + inputs=inputs, + outputs=outputs, + stream=stream, + ) # We use the highest probability as our prediction. Its index corresponds to the predicted label. pred = labels[np.argmax(trt_outputs[0])] common.free_buffers(inputs, outputs, stream) @@ -195,5 +215,6 @@ def main(): else: print("Incorrectly recognized " + test_case + " as " + pred) + if __name__ == "__main__": main() diff --git a/samples/python/tensorflow_object_detection_api/build_engine.py b/samples/python/tensorflow_object_detection_api/build_engine.py index 0a0d6238..9bbf5f7c 100644 --- a/samples/python/tensorflow_object_detection_api/build_engine.py +++ b/samples/python/tensorflow_object_detection_api/build_engine.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -56,7 +56,10 @@ def set_image_batcher(self, image_batcher: ImageBatcher): :param image_batcher: The ImageBatcher object """ self.image_batcher = image_batcher - size = int(np.dtype(self.image_batcher.dtype).itemsize * np.prod(self.image_batcher.shape)) + size = int( + np.dtype(self.image_batcher.dtype).itemsize + * np.prod(self.image_batcher.shape) + ) self.batch_allocation = common.cuda_call(cudart.cudaMalloc(size)) self.batch_generator = self.image_batcher.get_batch() @@ -81,8 +84,14 @@ def get_batch(self, names): return None try: batch, _, _ = next(self.batch_generator) - log.info("Calibrating image {} / {}".format(self.image_batcher.image_index, self.image_batcher.num_images)) - common.memcpy_host_to_device(self.batch_allocation, np.ascontiguousarray(batch)) + log.info( + "Calibrating image {} / {}".format( + self.image_batcher.image_index, self.image_batcher.num_images + ) + ) + common.memcpy_host_to_device( + self.batch_allocation, np.ascontiguousarray(batch) + ) return [int(self.batch_allocation)] except StopIteration: log.info("Finished calibration batches") @@ -128,7 +137,9 @@ def __init__(self, verbose=False, workspace=8): self.builder = trt.Builder(self.trt_logger) self.config = self.builder.create_builder_config() - self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace * (2 ** 30)) + self.config.set_memory_pool_limit( + trt.MemoryPoolType.WORKSPACE, workspace * (2**30) + ) self.batch_size = None self.network = None @@ -157,9 +168,17 @@ def create_network(self, onnx_path): log.info("Network Description") for input in inputs: self.batch_size = input.shape[0] - log.info("Input '{}' with shape {} and dtype {}".format(input.name, input.shape, input.dtype)) + log.info( + "Input '{}' with shape {} and dtype {}".format( + input.name, input.shape, input.dtype + ) + ) for output in outputs: - log.info("Output '{}' with shape {} and dtype {}".format(output.name, output.shape, output.dtype)) + log.info( + "Output '{}' with shape {} and dtype {}".format( + output.name, output.shape, output.dtype + ) + ) assert self.batch_size > 0 # TODO: These overrides are to improve fp16/int8 performance on FRCNN models @@ -167,17 +186,25 @@ def create_network(self, onnx_path): # type on the two NMS plugins. To be determined. for i in range(self.network.num_layers): if self.network.get_layer(i).name in [ - "FirstStageBoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/squeeze", - "FirstStageBoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/scale_value:0", - "FirstStageBoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/scale", - "nms/anchors:0"]: + "FirstStageBoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/squeeze", + "FirstStageBoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/scale_value:0", + "FirstStageBoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/scale", + "nms/anchors:0", + ]: self.network.get_layer(i).precision = trt.DataType.FLOAT - self.network.get_layer(i-1).precision = trt.DataType.FLOAT + self.network.get_layer(i - 1).precision = trt.DataType.FLOAT if self.network.get_layer(i).name == "FirstNMS/detection_boxes_conversion": self.network.get_layer(i).precision = trt.DataType.FLOAT - def create_engine(self, engine_path, precision, calib_input=None, calib_cache=None, calib_num_images=5000, - calib_batch_size=8): + def create_engine( + self, + engine_path, + precision, + calib_input=None, + calib_cache=None, + calib_num_images=5000, + calib_batch_size=8, + ): """ Build the TensorRT engine and serialize it to disk. :param engine_path: The path where to serialize the engine to. @@ -218,8 +245,14 @@ def create_engine(self, engine_path, precision, calib_input=None, calib_cache=No calib_shape = [calib_batch_size] + list(inputs[0].shape[1:]) calib_dtype = trt.nptype(inputs[0].dtype) self.config.int8_calibrator.set_image_batcher( - ImageBatcher(calib_input, calib_shape, calib_dtype, max_num_images=calib_num_images, - exact_batches=True)) + ImageBatcher( + calib_input, + calib_shape, + calib_dtype, + max_num_images=calib_num_images, + exact_batches=True, + ) + ) engine_bytes = self.builder.build_serialized_network(self.network, self.config) if engine_bytes is None: @@ -234,33 +267,68 @@ def create_engine(self, engine_path, precision, calib_input=None, calib_cache=No def main(args): builder = EngineBuilder(args.verbose, args.workspace) builder.create_network(args.onnx) - builder.create_engine(args.engine, args.precision, args.calib_input, args.calib_cache, args.calib_num_images, - args.calib_batch_size) + builder.create_engine( + args.engine, + args.precision, + args.calib_input, + args.calib_cache, + args.calib_num_images, + args.calib_batch_size, + ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-o", "--onnx", help="The input ONNX model file to load") parser.add_argument("-e", "--engine", help="The output path for the TRT engine") - parser.add_argument("-p", "--precision", default="fp16", choices=["fp32", "fp16", "int8"], - help="The precision mode to build in, either 'fp32', 'fp16' or 'int8', default: 'fp16'") - parser.add_argument("-v", "--verbose", action="store_true", help="Enable more verbose log output") - parser.add_argument("-w", "--workspace", default=1, type=int, help="The max memory workspace size to allow in Gb, " - "default: 1") - parser.add_argument("--calib_input", help="The directory holding images to use for calibration") - parser.add_argument("--calib_cache", default="./calibration.cache", - help="The file path for INT8 calibration cache to use, default: ./calibration.cache") - parser.add_argument("--calib_num_images", default=5000, type=int, - help="The maximum number of images to use for calibration, default: 5000") - parser.add_argument("--calib_batch_size", default=8, type=int, - help="The batch size for the calibration process, default: 8") + parser.add_argument( + "-p", + "--precision", + default="fp16", + choices=["fp32", "fp16", "int8"], + help="The precision mode to build in, either 'fp32', 'fp16' or 'int8', default: 'fp16'", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="Enable more verbose log output" + ) + parser.add_argument( + "-w", + "--workspace", + default=1, + type=int, + help="The max memory workspace size to allow in Gb, " "default: 1", + ) + parser.add_argument( + "--calib_input", help="The directory holding images to use for calibration" + ) + parser.add_argument( + "--calib_cache", + default="./calibration.cache", + help="The file path for INT8 calibration cache to use, default: ./calibration.cache", + ) + parser.add_argument( + "--calib_num_images", + default=5000, + type=int, + help="The maximum number of images to use for calibration, default: 5000", + ) + parser.add_argument( + "--calib_batch_size", + default=8, + type=int, + help="The batch size for the calibration process, default: 8", + ) args = parser.parse_args() if not all([args.onnx, args.engine]): parser.print_help() log.error("These arguments are required: --onnx and --engine") sys.exit(1) - if args.precision == "int8" and not (args.calib_input or os.path.exists(args.calib_cache)): + if args.precision == "int8" and not ( + args.calib_input or os.path.exists(args.calib_cache) + ): parser.print_help() - log.error("When building in int8 precision, --calib_input or an existing --calib_cache file is required") + log.error( + "When building in int8 precision, --calib_input or an existing --calib_cache file is required" + ) sys.exit(1) main(args) diff --git a/samples/python/tensorflow_object_detection_api/compare_tf.py b/samples/python/tensorflow_object_detection_api/compare_tf.py index 409aec6b..ae5168eb 100644 --- a/samples/python/tensorflow_object_detection_api/compare_tf.py +++ b/samples/python/tensorflow_object_detection_api/compare_tf.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -27,6 +27,7 @@ from image_batcher import ImageBatcher from visualize import visualize_detections, concat_visualizations + class TensorFlowInfer: """ Implements TensorFlow inference of a saved model, following the same API as the TensorRTInfer class. @@ -36,45 +37,49 @@ def __init__(self, saved_model_path, preprocessor, detection_type, iou_threshold self.preprocessor = preprocessor self.detection_type = detection_type self.iou_threshold = iou_threshold - gpus = tf.config.experimental.list_physical_devices('GPU') + gpus = tf.config.experimental.list_physical_devices("GPU") for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) self.model = tf.saved_model.load(saved_model_path) - self.pred_fn = self.model.signatures['serving_default'] + self.pred_fn = self.model.signatures["serving_default"] # Setup I/O bindings self.inputs = [] fn_inputs = self.pred_fn.structured_input_signature[1] for i, input in enumerate(list(fn_inputs.values())): - self.inputs.append({ - 'index': i, - 'name': input.name, - 'dtype': np.dtype(input.dtype.as_numpy_dtype()), - 'shape': [1, 512, 512, 3], # This can be overridden later - }) + self.inputs.append( + { + "index": i, + "name": input.name, + "dtype": np.dtype(input.dtype.as_numpy_dtype()), + "shape": [1, 512, 512, 3], # This can be overridden later + } + ) self.outputs = [] fn_outputs = self.pred_fn.structured_outputs for i, output in enumerate(list(fn_outputs.values())): - self.outputs.append({ - 'index': i, - 'name': output.name, - 'dtype': np.dtype(output.dtype.as_numpy_dtype()), - 'shape': output.shape.as_list(), - }) + self.outputs.append( + { + "index": i, + "name": output.name, + "dtype": np.dtype(output.dtype.as_numpy_dtype()), + "shape": output.shape.as_list(), + } + ) def override_input_shape(self, input, shape): - self.inputs[input]['shape'] = shape + self.inputs[input]["shape"] = shape def input_spec(self): - return self.inputs[0]['shape'], self.inputs[0]['dtype'] + return self.inputs[0]["shape"], self.inputs[0]["dtype"] def output_spec(self): - return self.outputs[0]['shape'], self.outputs[0]['dtype'] + return self.outputs[0]["shape"], self.outputs[0]["dtype"] def infer(self, batch, scales=None, nms_threshold=None): # Process I/O and execute the network - input = {self.inputs[0]['name']: tf.convert_to_tensor(batch)} + input = {self.inputs[0]["name"]: tf.convert_to_tensor(batch)} output = self.pred_fn(**input) # Extract the results depending on what kind of saved model this is @@ -82,24 +87,24 @@ def infer(self, batch, scales=None, nms_threshold=None): scores = None classes = None - assert output['num_detections'] - num = int(output['num_detections'].numpy().flatten()[0]) - boxes = output['detection_boxes'].numpy()[:, 0:num, :] - scores = output['detection_scores'].numpy()[:, 0:num] - classes = output['detection_classes'].numpy()[:, 0:num] + assert output["num_detections"] + num = int(output["num_detections"].numpy().flatten()[0]) + boxes = output["detection_boxes"].numpy()[:, 0:num, :] + scores = output["detection_scores"].numpy()[:, 0:num] + classes = output["detection_classes"].numpy()[:, 0:num] # One additional output for segmentation masks if "detection_masks" in output: - masks = output['detection_masks'].numpy()[:, 0:num] + masks = output["detection_masks"].numpy()[:, 0:num] # Process the results detections = [[]] - normalized = (np.max(boxes) < 2.0) + normalized = np.max(boxes) < 2.0 for n in range(scores.shape[1]): # Depending on preprocessor, box scaling will be slightly different. if self.preprocessor == "fixed_shape_resizer": if scores[0][n] == 0.0: break - scale_x = self.inputs[0]['shape'][1] if normalized else 1.0 - scale_y = self.inputs[0]['shape'][2] if normalized else 1.0 + scale_x = self.inputs[0]["shape"][1] if normalized else 1.0 + scale_y = self.inputs[0]["shape"][2] if normalized else 1.0 if scales: scale_x /= scales[0][0] @@ -107,11 +112,11 @@ def infer(self, batch, scales=None, nms_threshold=None): if nms_threshold and scores[0][n] < nms_threshold: continue # Depending on detection type you need slightly different data. - if self.detection_type == 'bbox': + if self.detection_type == "bbox": mask = None # Segmentation is only supported with Mask R-CNN, which has # fixed_shape_resizer as image_resizer (lookup pipeline.config) - elif self.detection_type == 'segmentation': + elif self.detection_type == "segmentation": # Select a mask mask = masks[0][n] # Slight scaling, to get binary masks after float32 -> uint8 @@ -124,7 +129,7 @@ def infer(self, batch, scales=None, nms_threshold=None): mask = None if scores[0][n] == 0.0: break - scale = self.inputs[0]['shape'][2] if normalized else 1.0 + scale = self.inputs[0]["shape"][2] if normalized else 1.0 if scales: scale /= scales[0] scale_y = scale @@ -132,15 +137,17 @@ def infer(self, batch, scales=None, nms_threshold=None): if nms_threshold and scores[0][n] < nms_threshold: continue # Append to detections - detections[0].append({ - 'ymin': boxes[0][n][0] * scale_y, - 'xmin': boxes[0][n][1] * scale_x, - 'ymax': boxes[0][n][2] * scale_y, - 'xmax': boxes[0][n][3] * scale_x, - 'score': scores[0][n], - 'class': int(classes[0][n]) - 1, - 'mask': mask, - }) + detections[0].append( + { + "ymin": boxes[0][n][0] * scale_y, + "xmin": boxes[0][n][1] * scale_x, + "ymax": boxes[0][n][2] * scale_y, + "xmax": boxes[0][n][3] * scale_x, + "score": scores[0][n], + "class": int(classes[0][n]) - 1, + "mask": mask, + } + ) return detections @@ -150,7 +157,12 @@ def run(batcher, inferer, framework, nms_threshold=None): for batch, images, scales in batcher.get_batch(): res_detections += inferer.infer(batch, scales, nms_threshold) res_images += images - print("Processing {} / {} images ({})".format(batcher.image_index, batcher.num_images, framework), end="\r") + print( + "Processing {} / {} images ({})".format( + batcher.image_index, batcher.num_images, framework + ), + end="\r", + ) print() return res_images, res_detections @@ -159,34 +171,45 @@ def parse_annotations(annotations_path, detection_type): annotations = {} if annotations_path and os.path.exists(annotations_path): # Load annotations as coco, to extract segmentation masks - coco=COCO(annotations_path) + coco = COCO(annotations_path) with open(annotations_path) as f: ann_json = json.load(f) - for ann in ann_json['annotations']: - img_id = ann['image_id'] + for ann in ann_json["annotations"]: + img_id = ann["image_id"] if img_id not in annotations.keys(): annotations[img_id] = [] # Depending on detection type you need slightly different data. - if detection_type == 'bbox': + if detection_type == "bbox": mask = None # Segmentation is only supported with Mask R-CNN, which has # fixed_shape_resizer as image_resizer (lookup pipeline.config) - elif detection_type == 'segmentation': + elif detection_type == "segmentation": # Get np.array segmentation mask from annotation mask = coco.annToMask(ann) - annotations[img_id].append({ - 'ymin': ann['bbox'][1], - 'xmin': ann['bbox'][0], - 'ymax': ann['bbox'][1] + ann['bbox'][3], - 'xmax': ann['bbox'][0] + ann['bbox'][2], - 'score': -1, - 'class': ann['category_id'] - 1, - 'mask': mask, - }) + annotations[img_id].append( + { + "ymin": ann["bbox"][1], + "xmin": ann["bbox"][0], + "ymax": ann["bbox"][1] + ann["bbox"][3], + "xmax": ann["bbox"][0] + ann["bbox"][2], + "score": -1, + "class": ann["category_id"] - 1, + "mask": mask, + } + ) return annotations -def compare_images(tf_images, tf_detections, trt_images, trt_detections, output_dir, annotations_path, labels_path, detection_type): +def compare_images( + tf_images, + tf_detections, + trt_images, + trt_detections, + output_dir, + annotations_path, + labels_path, + detection_type, +): labels = [] if labels_path and os.path.exists(labels_path): with open(labels_path) as f: @@ -196,7 +219,9 @@ def compare_images(tf_images, tf_detections, trt_images, trt_detections, output_ annotations = parse_annotations(annotations_path, detection_type) count = 1 - for tf_img, tf_det, trt_img, trt_det in zip(tf_images, tf_detections, trt_images, trt_detections): + for tf_img, tf_det, trt_img, trt_det in zip( + tf_images, tf_detections, trt_images, trt_detections + ): vis = [] names = [] colors = [] @@ -214,60 +239,142 @@ def compare_images(tf_images, tf_detections, trt_images, trt_detections, output_ if img_id.isnumeric(): img_id = int(img_id) if img_id in annotations.keys(): - vis.append(visualize_detections(trt_img, None, annotations[img_id], labels)) + vis.append( + visualize_detections(trt_img, None, annotations[img_id], labels) + ) names.append("Ground Truth") colors.append("RoyalBlue") else: - print("Image {} does not have a COCO annotation, skipping ground truth visualization".format(trt_img)) + print( + "Image {} does not have a COCO annotation, skipping ground truth visualization".format( + trt_img + ) + ) basename = os.path.splitext(os.path.basename(tf_img))[0] output_path = os.path.join(output_dir, "{}.compare.png".format(basename)) os.makedirs(output_dir, exist_ok=True) concat_visualizations(vis, names, colors, output_path) - print("Processing {} / {} images (Visualization)".format(count, len(tf_images)), end="\r") + print( + "Processing {} / {} images (Visualization)".format(count, len(tf_images)), + end="\r", + ) count += 1 print() def main(args): - tf_infer = TensorFlowInfer(args.saved_model, args.preprocessor, args.detection_type, args.iou_threshold) - trt_infer = TensorRTInfer(args.engine, args.preprocessor, args.detection_type, args.iou_threshold) - - trt_batcher = ImageBatcher(args.input, *trt_infer.input_spec(), max_num_images=args.num_images, preprocessor=args.preprocessor) - tf_infer.override_input_shape(0, [1, trt_batcher.height, trt_batcher.width, 3]) # Same size input in TF as TRT - tf_batcher = ImageBatcher(args.input, *tf_infer.input_spec(), max_num_images=args.num_images, preprocessor=args.preprocessor) - - tf_images, tf_detections = run(tf_batcher, tf_infer, "TensorFlow", args.nms_threshold) - trt_images, trt_detections = run(trt_batcher, trt_infer, "TensorRT", args.nms_threshold) - - compare_images(tf_images, tf_detections, trt_images, trt_detections, args.output, args.annotations, args.labels, args.detection_type) + tf_infer = TensorFlowInfer( + args.saved_model, args.preprocessor, args.detection_type, args.iou_threshold + ) + trt_infer = TensorRTInfer( + args.engine, args.preprocessor, args.detection_type, args.iou_threshold + ) + + trt_batcher = ImageBatcher( + args.input, + *trt_infer.input_spec(), + max_num_images=args.num_images, + preprocessor=args.preprocessor + ) + tf_infer.override_input_shape( + 0, [1, trt_batcher.height, trt_batcher.width, 3] + ) # Same size input in TF as TRT + tf_batcher = ImageBatcher( + args.input, + *tf_infer.input_spec(), + max_num_images=args.num_images, + preprocessor=args.preprocessor + ) + + tf_images, tf_detections = run( + tf_batcher, tf_infer, "TensorFlow", args.nms_threshold + ) + trt_images, trt_detections = run( + trt_batcher, trt_infer, "TensorRT", args.nms_threshold + ) + + compare_images( + tf_images, + tf_detections, + trt_images, + trt_detections, + args.output, + args.annotations, + args.labels, + args.detection_type, + ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with") - parser.add_argument("-m", "--saved_model", help="The TensorFlow saved model path to validate against") - parser.add_argument("-i", "--input", - help="The input to infer, either a single image path, or a directory of images") - parser.add_argument("-o", "--output", default=None, help="Directory where to save the visualization results") - parser.add_argument("-l", "--labels", default="./labels_coco.txt", - help="File to use for reading the class labels from, default: ./labels_coco.txt") - parser.add_argument("-a", "--annotations", default=None, - help="Set the path to the 'instances_val2017.json' file to use for COCO annotations, in which " - "case --input should point to the COCO val2017 dataset, default: not used") - parser.add_argument("-n", "--num_images", default=100, type=int, - help="The maximum number of images to visualize, default: 100") - parser.add_argument("-t", "--nms_threshold", type=float, - help="Override the score threshold for the NMS operation, if higher than the threshold in the model/engine.") - parser.add_argument("--iou_threshold", default=0.5, type=float, - help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0") - parser.add_argument("-d", "--detection_type", default="bbox", choices=["bbox", "segmentation"], - help="Detection type for COCO, either bbox or if you are using Mask R-CNN's instance segmentation - segmentation") - parser.add_argument("--preprocessor", default="fixed_shape_resizer", choices=["fixed_shape_resizer", "keep_aspect_ratio_resizer"], - help="Select the image preprocessor to use based on your pipeline.config, either 'fixed_shape_resizer' or 'keep_aspect_ratio_resizer', default: fixed_shape_resizer") + parser.add_argument( + "-m", + "--saved_model", + help="The TensorFlow saved model path to validate against", + ) + parser.add_argument( + "-i", + "--input", + help="The input to infer, either a single image path, or a directory of images", + ) + parser.add_argument( + "-o", + "--output", + default=None, + help="Directory where to save the visualization results", + ) + parser.add_argument( + "-l", + "--labels", + default="./labels_coco.txt", + help="File to use for reading the class labels from, default: ./labels_coco.txt", + ) + parser.add_argument( + "-a", + "--annotations", + default=None, + help="Set the path to the 'instances_val2017.json' file to use for COCO annotations, in which " + "case --input should point to the COCO val2017 dataset, default: not used", + ) + parser.add_argument( + "-n", + "--num_images", + default=100, + type=int, + help="The maximum number of images to visualize, default: 100", + ) + parser.add_argument( + "-t", + "--nms_threshold", + type=float, + help="Override the score threshold for the NMS operation, if higher than the threshold in the model/engine.", + ) + parser.add_argument( + "--iou_threshold", + default=0.5, + type=float, + help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0", + ) + parser.add_argument( + "-d", + "--detection_type", + default="bbox", + choices=["bbox", "segmentation"], + help="Detection type for COCO, either bbox or if you are using Mask R-CNN's instance segmentation - segmentation", + ) + parser.add_argument( + "--preprocessor", + default="fixed_shape_resizer", + choices=["fixed_shape_resizer", "keep_aspect_ratio_resizer"], + help="Select the image preprocessor to use based on your pipeline.config, either 'fixed_shape_resizer' or 'keep_aspect_ratio_resizer', default: fixed_shape_resizer", + ) args = parser.parse_args() - if not all([args.engine, args.saved_model, args.input, args.output, args.preprocessor]): + if not all( + [args.engine, args.saved_model, args.input, args.output, args.preprocessor] + ): parser.print_help() sys.exit(1) main(args) diff --git a/samples/python/tensorflow_object_detection_api/create_onnx.py b/samples/python/tensorflow_object_detection_api/create_onnx.py index 919cc8e6..fc75fa17 100644 --- a/samples/python/tensorflow_object_detection_api/create_onnx.py +++ b/samples/python/tensorflow_object_detection_api/create_onnx.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -32,7 +32,9 @@ from object_detection.utils import config_util except ImportError: print("Could not import TFOD modules. Maybe you did not install TFOD API") - print("Please install TensorFlow 2 Object Detection API, check https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2.md") + print( + "Please install TensorFlow 2 Object Detection API, check https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2.md" + ) sys.exit(1) import onnx_utils @@ -55,14 +57,19 @@ def __init__(self, saved_model_path, pipeline_config_path): assert os.path.exists(saved_model_path) # Use tf2onnx to convert saved model to an initial ONNX graph. - graph_def, inputs, outputs = tf_loader.from_saved_model(saved_model_path, None, None, "serve", - ["serving_default"]) + graph_def, inputs, outputs = tf_loader.from_saved_model( + saved_model_path, None, None, "serve", ["serving_default"] + ) log.info("Loaded saved model from {}".format(saved_model_path)) with tf.Graph().as_default() as tf_graph: tf.import_graph_def(graph_def, name="") with tf_loader.tf_session(graph=tf_graph): - onnx_graph = tfonnx.process_tf_graph(tf_graph, input_names=inputs, output_names=outputs, opset=11) - onnx_model = optimizer.optimize_graph(onnx_graph).make_model("Converted from {}".format(saved_model_path)) + onnx_graph = tfonnx.process_tf_graph( + tf_graph, input_names=inputs, output_names=outputs, opset=11 + ) + onnx_model = optimizer.optimize_graph(onnx_graph).make_model( + "Converted from {}".format(saved_model_path) + ) self.graph = gs.import_onnx(onnx_model) assert self.graph log.info("TF2ONNX graph created successfully") @@ -71,61 +78,140 @@ def __init__(self, saved_model_path, pipeline_config_path): self.graph.fold_constants() # Pipeline config parsing. - pipeline_config = config_util.get_configs_from_pipeline_file(pipeline_config_path) + pipeline_config = config_util.get_configs_from_pipeline_file( + pipeline_config_path + ) # Get input resolution. - self.height, self.width = config_util.get_spatial_image_size(config_util.get_image_resizer_config(pipeline_config["model"])) + self.height, self.width = config_util.get_spatial_image_size( + config_util.get_image_resizer_config(pipeline_config["model"]) + ) # If your model is SSD, get characteristics accordingly from pipeline.config file. if pipeline_config["model"].HasField("ssd"): # Getting model characteristics. self.model = str(pipeline_config["model"].ssd.feature_extractor.type) - self.first_stage_nms_score_threshold = float(pipeline_config["model"].ssd.post_processing.batch_non_max_suppression.score_threshold) - self.first_stage_nms_iou_threshold = float(pipeline_config["model"].ssd.post_processing.batch_non_max_suppression.iou_threshold) - self.first_stage_max_proposals = int(pipeline_config["model"].ssd.post_processing.batch_non_max_suppression.max_detections_per_class) + self.first_stage_nms_score_threshold = float( + pipeline_config[ + "model" + ].ssd.post_processing.batch_non_max_suppression.score_threshold + ) + self.first_stage_nms_iou_threshold = float( + pipeline_config[ + "model" + ].ssd.post_processing.batch_non_max_suppression.iou_threshold + ) + self.first_stage_max_proposals = int( + pipeline_config[ + "model" + ].ssd.post_processing.batch_non_max_suppression.max_detections_per_class + ) # If your model is Faster R-CNN get it's characteristics from pipeline.config file. elif pipeline_config["model"].HasField("faster_rcnn"): # Getting model characteristics. - self.model = str(pipeline_config["model"].faster_rcnn.feature_extractor.type) + self.model = str( + pipeline_config["model"].faster_rcnn.feature_extractor.type + ) self.num_classes = pipeline_config["model"].faster_rcnn.num_classes - self.first_stage_nms_score_threshold = float(pipeline_config["model"].faster_rcnn.first_stage_nms_score_threshold) - self.first_stage_nms_iou_threshold = float(pipeline_config["model"].faster_rcnn.first_stage_nms_iou_threshold) - self.first_stage_max_proposals = int(pipeline_config["model"].faster_rcnn.first_stage_max_proposals) - self.first_stage_crop_size = int(pipeline_config["model"].faster_rcnn.initial_crop_size) - self.second_stage_nms_score_threshold = float(pipeline_config["model"].faster_rcnn.second_stage_post_processing.batch_non_max_suppression.score_threshold) - self.second_stage_iou_threshold = float(pipeline_config["model"].faster_rcnn.second_stage_post_processing.batch_non_max_suppression.iou_threshold) + self.first_stage_nms_score_threshold = float( + pipeline_config["model"].faster_rcnn.first_stage_nms_score_threshold + ) + self.first_stage_nms_iou_threshold = float( + pipeline_config["model"].faster_rcnn.first_stage_nms_iou_threshold + ) + self.first_stage_max_proposals = int( + pipeline_config["model"].faster_rcnn.first_stage_max_proposals + ) + self.first_stage_crop_size = int( + pipeline_config["model"].faster_rcnn.initial_crop_size + ) + self.second_stage_nms_score_threshold = float( + pipeline_config[ + "model" + ].faster_rcnn.second_stage_post_processing.batch_non_max_suppression.score_threshold + ) + self.second_stage_iou_threshold = float( + pipeline_config[ + "model" + ].faster_rcnn.second_stage_post_processing.batch_non_max_suppression.iou_threshold + ) self.mask_height = None self.mask_width = None self.matmul_crop_and_resize = False # Check what kind of Crop and Resize operation is used - if pipeline_config["model"].faster_rcnn.HasField("use_matmul_crop_and_resize"): - self.matmul_crop_and_resize = pipeline_config["model"].faster_rcnn.use_matmul_crop_and_resize + if pipeline_config["model"].faster_rcnn.HasField( + "use_matmul_crop_and_resize" + ): + self.matmul_crop_and_resize = pipeline_config[ + "model" + ].faster_rcnn.use_matmul_crop_and_resize # If model is Mask R-CNN, get final instance segmentation masks resolution. - if pipeline_config["model"].faster_rcnn.second_stage_box_predictor.mask_rcnn_box_predictor.HasField("mask_height") and pipeline_config["model"].faster_rcnn.second_stage_box_predictor.mask_rcnn_box_predictor.HasField("mask_width"): - self.mask_height = int(pipeline_config["model"].faster_rcnn.second_stage_box_predictor.mask_rcnn_box_predictor.mask_height) - self.mask_width = int(pipeline_config["model"].faster_rcnn.second_stage_box_predictor.mask_rcnn_box_predictor.mask_width) + if pipeline_config[ + "model" + ].faster_rcnn.second_stage_box_predictor.mask_rcnn_box_predictor.HasField( + "mask_height" + ) and pipeline_config[ + "model" + ].faster_rcnn.second_stage_box_predictor.mask_rcnn_box_predictor.HasField( + "mask_width" + ): + self.mask_height = int( + pipeline_config[ + "model" + ].faster_rcnn.second_stage_box_predictor.mask_rcnn_box_predictor.mask_height + ) + self.mask_width = int( + pipeline_config[ + "model" + ].faster_rcnn.second_stage_box_predictor.mask_rcnn_box_predictor.mask_width + ) else: log.info("Given Model type is not supported") sys.exit(1) # List of supported models. - supported_models = ["ssd_mobilenet_v2_keras", "ssd_mobilenet_v1_fpn_keras", "ssd_mobilenet_v2_fpn_keras", "ssd_resnet50_v1_fpn_keras", - "ssd_resnet101_v1_fpn_keras", "ssd_resnet152_v1_fpn_keras", "faster_rcnn_resnet50_keras", "faster_rcnn_resnet101_keras", - "faster_rcnn_resnet152_keras", "faster_rcnn_inception_resnet_v2_keras"] + supported_models = [ + "ssd_mobilenet_v2_keras", + "ssd_mobilenet_v1_fpn_keras", + "ssd_mobilenet_v2_fpn_keras", + "ssd_resnet50_v1_fpn_keras", + "ssd_resnet101_v1_fpn_keras", + "ssd_resnet152_v1_fpn_keras", + "faster_rcnn_resnet50_keras", + "faster_rcnn_resnet101_keras", + "faster_rcnn_resnet152_keras", + "faster_rcnn_inception_resnet_v2_keras", + ] assert self.model in supported_models # Model characteristics. log.info("Model is {}".format(self.model)) log.info("Height is {}".format(self.height)) log.info("Width is {}".format(self.width)) - log.info("First NMS score threshold is {}".format(self.first_stage_nms_score_threshold)) - log.info("First NMS iou threshold is {}".format(self.first_stage_nms_iou_threshold)) + log.info( + "First NMS score threshold is {}".format( + self.first_stage_nms_score_threshold + ) + ) + log.info( + "First NMS iou threshold is {}".format(self.first_stage_nms_iou_threshold) + ) log.info("First NMS max proposals is {}".format(self.first_stage_max_proposals)) if "faster_rcnn" in self.model: log.info("Number of classes is {}".format(self.num_classes)) - log.info("Crop and Resize output size is {}".format(self.first_stage_crop_size)) - log.info("Second NMS score threshold is {}".format(self.second_stage_nms_score_threshold)) - log.info("Second NMS iou threshold is {}".format(self.second_stage_iou_threshold)) - log.info("Using MatMul Crop and Resize: {}".format(self.matmul_crop_and_resize)) + log.info( + "Crop and Resize output size is {}".format(self.first_stage_crop_size) + ) + log.info( + "Second NMS score threshold is {}".format( + self.second_stage_nms_score_threshold + ) + ) + log.info( + "Second NMS iou threshold is {}".format(self.second_stage_iou_threshold) + ) + log.info( + "Using MatMul Crop and Resize: {}".format(self.matmul_crop_and_resize) + ) if not (self.mask_height is None and self.mask_width is None): log.info("Mask height is {}".format(self.mask_height)) log.info("Mask width is {}".format(self.mask_width)) @@ -155,12 +241,16 @@ def sanitize(self): model = shape_inference.infer_shapes(model) self.graph = gs.import_onnx(model) except Exception as e: - log.info("Shape inference could not be performed at this time:\n{}".format(e)) + log.info( + "Shape inference could not be performed at this time:\n{}".format(e) + ) try: self.graph.fold_constants(fold_shapes=True) except TypeError as e: - log.error("This version of ONNX GraphSurgeon does not support folding shapes, please upgrade your " - "onnx_graphsurgeon module. Error:\n{}".format(e)) + log.error( + "This version of ONNX GraphSurgeon does not support folding shapes, please upgrade your " + "onnx_graphsurgeon module. Error:\n{}".format(e) + ) raise count_after = len(self.graph.nodes) @@ -189,11 +279,22 @@ def add_debug_output(self, debug): for n, name in enumerate(debug): if name not in tensors: log.warning("Could not find tensor '{}'".format(name)) - debug_tensor = gs.Variable(name="debug:{}".format(n), dtype=tensors[name].dtype) - debug_node = gs.Node(op="Identity", name="debug_{}".format(n), inputs=[tensors[name]], outputs=[debug_tensor]) + debug_tensor = gs.Variable( + name="debug:{}".format(n), dtype=tensors[name].dtype + ) + debug_node = gs.Node( + op="Identity", + name="debug_{}".format(n), + inputs=[tensors[name]], + outputs=[debug_tensor], + ) self.graph.nodes.append(debug_node) self.graph.outputs.append(debug_tensor) - log.info("Adding debug output '{}' for graph tensor '{}'".format(debug_tensor.name, name)) + log.info( + "Adding debug output '{}' for graph tensor '{}'".format( + debug_tensor.name, name + ) + ) def update_preprocessor(self, batch_size, input_format): """ @@ -208,46 +309,71 @@ def update_preprocessor(self, batch_size, input_format): assert input_format in ["NCHW", "NHWC"] input_shape = [None] * 4 if input_format == "NHWC": - input_shape = [self.batch_size, self.height, self.width, 3] + input_shape = [self.batch_size, self.height, self.width, 3] if input_format == "NCHW": - input_shape = [self.batch_size, 3, self.height, self.width] + input_shape = [self.batch_size, 3, self.height, self.width] self.graph.inputs[0].shape = input_shape self.graph.inputs[0].dtype = np.float32 self.graph.inputs[0].name = "input_tensor" self.sanitize() - log.info("ONNX graph input shape: {} [NCHW format set]".format(self.graph.inputs[0].shape)) + log.info( + "ONNX graph input shape: {} [NCHW format set]".format( + self.graph.inputs[0].shape + ) + ) # Find the initial nodes of the graph, whatever the input is first connected to, and disconnect them. - for node in [node for node in self.graph.nodes if self.graph.inputs[0] in node.inputs]: + for node in [ + node for node in self.graph.nodes if self.graph.inputs[0] in node.inputs + ]: node.inputs.clear() # Get input tensor. # Convert to NCHW format if needed. input_tensor = self.graph.inputs[0] if input_format == "NHWC": - input_tensor = self.graph.transpose("preprocessor/transpose", input_tensor, [0, 3, 1, 2]) + input_tensor = self.graph.transpose( + "preprocessor/transpose", input_tensor, [0, 3, 1, 2] + ) # Mobilenets' and inception's backbones preprocessor. - if 'mobilenet' in self.model or 'inception_resnet' in self.model: - mul_const = np.expand_dims(np.asarray([2 / 255], dtype=np.float32), axis=(0, 2, 3)) - sub_const = np.expand_dims(np.asarray([1], dtype=np.float32), axis=(0, 2, 3)) - mul_out = self.graph.op_with_const("Mul", "preprocessor/scale", input_tensor, mul_const) - sub_out = self.graph.op_with_const("Sub", "preprocessor/mean", mul_out, sub_const) + if "mobilenet" in self.model or "inception_resnet" in self.model: + mul_const = np.expand_dims( + np.asarray([2 / 255], dtype=np.float32), axis=(0, 2, 3) + ) + sub_const = np.expand_dims( + np.asarray([1], dtype=np.float32), axis=(0, 2, 3) + ) + mul_out = self.graph.op_with_const( + "Mul", "preprocessor/scale", input_tensor, mul_const + ) + sub_out = self.graph.op_with_const( + "Sub", "preprocessor/mean", mul_out, sub_const + ) # Resnet backbones' preprocessor. - elif 'resnet' in self.model: - sub_const = np.expand_dims(np.asarray([255 * 0.485, 255 * 0.456, 255 * 0.406], dtype=np.float32), axis=(0, 2, 3)) - sub_out = self.graph.op_with_const("Sub", "preprocessor/mean", input_tensor, sub_const) + elif "resnet" in self.model: + sub_const = np.expand_dims( + np.asarray([255 * 0.485, 255 * 0.456, 255 * 0.406], dtype=np.float32), + axis=(0, 2, 3), + ) + sub_out = self.graph.op_with_const( + "Sub", "preprocessor/mean", input_tensor, sub_const + ) # Backbone is not supported. else: - log.info("Given model's backbone is not supported, pre-processor algorithm can't be generated") + log.info( + "Given model's backbone is not supported, pre-processor algorithm can't be generated" + ) sys.exit(1) # Find first Conv node and connect preprocessor directly to it. conv_node = self.graph.find_node_by_op("Conv") - log.info("Found {} node '{}' as stem entry".format(conv_node.op, conv_node.name)) + log.info( + "Found {} node '{}' as stem entry".format(conv_node.op, conv_node.name) + ) conv_node.inputs[0] = sub_out[0] # Disconnect the last node in one of the preprocessing branches with first TensorListStack parent node. @@ -275,9 +401,17 @@ def find_head_end(self, head_name, descendant, end_op): # and the Box Net end node has the shape [batch_size, num_anchors, 4]. # These end nodes can be be found by searching for all end_op's operation nodes and checking if the node two # steps above in the graph has a name that begins with one of head_names for Class Net and Box Net respectively. - for node in [node for node in self.graph.nodes if node.op == descendant and head_name in node.name]: + for node in [ + node + for node in self.graph.nodes + if node.op == descendant and head_name in node.name + ]: target_node = self.graph.find_descendant_by_op(node, end_op) - log.info("Found {} node '{}' as the tip of {}".format(target_node.op, target_node.name, head_name)) + log.info( + "Found {} node '{}' as the tip of {}".format( + target_node.op, target_node.name, head_name + ) + ) return target_node def extract_anchors_tensor(self, split): @@ -314,14 +448,27 @@ def get_anchor(output_idx, op, depth=5): anchors_h = get_anchor(2, "Mul") anchors_w = get_anchor(3, "Mul") - batched_anchors = np.concatenate([anchors_y, anchors_x, anchors_h, anchors_w], axis=2) + batched_anchors = np.concatenate( + [anchors_y, anchors_x, anchors_h, anchors_w], axis=2 + ) # Identify num of anchors without repetitions. - num_anchors = int(batched_anchors.shape[1]/self.batch_size) + num_anchors = int(batched_anchors.shape[1] / self.batch_size) # Trim total number of anchors in order to not have copies introduced by growing number of batch_size. - anchors = batched_anchors[0:num_anchors,0:num_anchors] + anchors = batched_anchors[0:num_anchors, 0:num_anchors] return gs.Constant(name="nms/anchors:0", values=anchors) - def NMS(self, box_net_tensor, class_net_tensor, anchors_tensor, background_class, score_activation, iou_threshold, nms_score_threshold, user_threshold, nms_name=None): + def NMS( + self, + box_net_tensor, + class_net_tensor, + anchors_tensor, + background_class, + score_activation, + iou_threshold, + nms_score_threshold, + user_threshold, + nms_name=None, + ): # Helper function to create the NMS Plugin node with the selected inputs. # EfficientNMS_TRT TensorRT Plugin is suitable for our use case. # :param box_net_tensor: The box predictions from the Box Net. @@ -341,35 +488,53 @@ def NMS(self, box_net_tensor, class_net_tensor, anchors_tensor, background_class nms_name = "_" + nms_name # Set score threshold. - score_threshold = nms_score_threshold if user_threshold is None else user_threshold + score_threshold = ( + nms_score_threshold if user_threshold is None else user_threshold + ) # NMS Outputs. - nms_output_num_detections = gs.Variable(name="num_detections"+nms_name, dtype=np.int32, shape=[self.batch_size, 1]) - nms_output_boxes = gs.Variable(name="detection_boxes"+nms_name, dtype=np.float32, - shape=[self.batch_size, self.first_stage_max_proposals, 4]) - nms_output_scores = gs.Variable(name="detection_scores"+nms_name, dtype=np.float32, - shape=[self.batch_size, self.first_stage_max_proposals]) - nms_output_classes = gs.Variable(name="detection_classes"+nms_name, dtype=np.int32, - shape=[self.batch_size, self.first_stage_max_proposals]) + nms_output_num_detections = gs.Variable( + name="num_detections" + nms_name, dtype=np.int32, shape=[self.batch_size, 1] + ) + nms_output_boxes = gs.Variable( + name="detection_boxes" + nms_name, + dtype=np.float32, + shape=[self.batch_size, self.first_stage_max_proposals, 4], + ) + nms_output_scores = gs.Variable( + name="detection_scores" + nms_name, + dtype=np.float32, + shape=[self.batch_size, self.first_stage_max_proposals], + ) + nms_output_classes = gs.Variable( + name="detection_classes" + nms_name, + dtype=np.int32, + shape=[self.batch_size, self.first_stage_max_proposals], + ) - nms_outputs = [nms_output_num_detections, nms_output_boxes, nms_output_scores, nms_output_classes] + nms_outputs = [ + nms_output_num_detections, + nms_output_boxes, + nms_output_scores, + nms_output_classes, + ] # Plugin. self.graph.plugin( op="EfficientNMS_TRT", - name="nms/non_maximum_suppression"+nms_name, + name="nms/non_maximum_suppression" + nms_name, inputs=[box_net_tensor, class_net_tensor, anchors_tensor], outputs=nms_outputs, attrs={ - 'plugin_version': "1", - 'background_class': background_class, - 'max_output_boxes': self.first_stage_max_proposals, - 'score_threshold': max(0.01, score_threshold), - 'iou_threshold': iou_threshold, - 'score_activation': score_activation, - 'class_agnostic': False, - 'box_coding': 1, - } + "plugin_version": "1", + "background_class": background_class, + "max_output_boxes": self.first_stage_max_proposals, + "score_threshold": max(0.01, score_threshold), + "iou_threshold": iou_threshold, + "score_activation": score_activation, + "class_agnostic": False, + "box_coding": 1, + }, ) log.info("Created 'nms/non_maximum_suppression{}' NMS plugin".format(nms_name)) @@ -384,15 +549,26 @@ def CropAndResize(self, unsqeeze_input, relu_node_outputs, cnr_num): # CropAndResizePlugin requires 4th dimension of 1: [N, B, 4, 1], so # we need to add unsqeeze node to make tensor 4 dimensional. - unsqueeze_node = self.graph.unsqueeze("CNR/detection_boxes_unsqueeze_"+cnr_num, unsqeeze_input) + unsqueeze_node = self.graph.unsqueeze( + "CNR/detection_boxes_unsqueeze_" + cnr_num, unsqeeze_input + ) # CropAndResizePlugin's inputs feature_maps = relu_node_outputs rois = unsqueeze_node[0] # CropAndResize Outputs. - cnr_pfmap = gs.Variable(name="cnr/pfmap_"+cnr_num, dtype=np.float32, - shape=[self.batch_size, self.first_stage_max_proposals, feature_maps.shape[1], self.first_stage_crop_size, self.first_stage_crop_size]) + cnr_pfmap = gs.Variable( + name="cnr/pfmap_" + cnr_num, + dtype=np.float32, + shape=[ + self.batch_size, + self.first_stage_max_proposals, + feature_maps.shape[1], + self.first_stage_crop_size, + self.first_stage_crop_size, + ], + ) # Create the CropandResize Plugin node with the selected inputs. # Two inputs are given to the CropAndResize TensorRT node: @@ -400,19 +576,29 @@ def CropAndResize(self, unsqeeze_input, relu_node_outputs, cnr_num): # - The rois (clipped and normalized detection boxes resulting from NMS): [batch_size, featuremap, 4, 1] self.graph.plugin( op="CropAndResize", - name="cnr/crop_and_resize_"+cnr_num, + name="cnr/crop_and_resize_" + cnr_num, inputs=[feature_maps, rois], outputs=[cnr_pfmap], attrs={ - 'crop_width': self.first_stage_crop_size, - 'crop_height': self.first_stage_crop_size, - } + "crop_width": self.first_stage_crop_size, + "crop_height": self.first_stage_crop_size, + }, ) log.info("Created {} CropAndResize plugin".format(cnr_num)) # Reshape node that is preparing CropAndResize's pfmap output shape for MaxPool node that comes next. - reshape_shape = np.asarray([self.first_stage_max_proposals*self.batch_size, feature_maps.shape[1], self.first_stage_crop_size, self.first_stage_crop_size], dtype=np.int64) - reshape_node = self.graph.op_with_const("Reshape", "cnr/reshape_"+cnr_num, cnr_pfmap, reshape_shape) + reshape_shape = np.asarray( + [ + self.first_stage_max_proposals * self.batch_size, + feature_maps.shape[1], + self.first_stage_crop_size, + self.first_stage_crop_size, + ], + dtype=np.int64, + ) + reshape_node = self.graph.op_with_const( + "Reshape", "cnr/reshape_" + cnr_num, cnr_pfmap, reshape_shape + ) return reshape_node[0] @@ -423,7 +609,10 @@ def process_graph(self, first_nms_threshold=None, second_nms_threshold=None): :param first_nms_threshold: Override the 1st NMS score threshold value. If set to None, use the value in the graph. :param second_nms_threshold: Override the 2nd NMS score threshold value. If set to None, use the value in the graph. """ - def first_nms(background_class, score_activation, first_nms_threshold, nms_name=None): + + def first_nms( + background_class, score_activation, first_nms_threshold, nms_name=None + ): """ Updates the graph to replace the 1st NMS op by EfficientNMS_TRT TensorRT plugin node. :param background_class: Set EfficientNMS_TRT's background_class atribute. @@ -432,35 +621,67 @@ def first_nms(background_class, score_activation, first_nms_threshold, nms_name= :param nms_name: Set the NMS node name. """ # Supported models - ssd_models = ['ssd_mobilenet_v1_fpn_keras', 'ssd_mobilenet_v2_fpn_keras', 'ssd_resnet50_v1_fpn_keras', 'ssd_resnet101_v1_fpn_keras', 'ssd_resnet152_v1_fpn_keras'] - frcnn_models = ['faster_rcnn_resnet50_keras', 'faster_rcnn_resnet101_keras', 'faster_rcnn_resnet152_keras', 'faster_rcnn_inception_resnet_v2_keras'] + ssd_models = [ + "ssd_mobilenet_v1_fpn_keras", + "ssd_mobilenet_v2_fpn_keras", + "ssd_resnet50_v1_fpn_keras", + "ssd_resnet101_v1_fpn_keras", + "ssd_resnet152_v1_fpn_keras", + ] + frcnn_models = [ + "faster_rcnn_resnet50_keras", + "faster_rcnn_resnet101_keras", + "faster_rcnn_resnet152_keras", + "faster_rcnn_inception_resnet_v2_keras", + ] # Getting SSD's Class and Box Nets final tensors. if "ssd" in self.model: # Find the concat node at the end of the class net (multi-scale class predictor). - class_net_head_name = 'BoxPredictor/ConvolutionalClassHead_' if self.model == 'ssd_mobilenet_v2_keras' else 'WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalClassHead' - class_net = self.find_head_end(class_net_head_name, "Transpose", "Concat") + class_net_head_name = ( + "BoxPredictor/ConvolutionalClassHead_" + if self.model == "ssd_mobilenet_v2_keras" + else "WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalClassHead" + ) + class_net = self.find_head_end( + class_net_head_name, "Transpose", "Concat" + ) # Final Class Net tensor - class_net_tensor = self.graph.slice(class_net_head_name+"/slicer", class_net.outputs[0], 1, 91, 2)[0] # Remove background class + class_net_tensor = self.graph.slice( + class_net_head_name + "/slicer", class_net.outputs[0], 1, 91, 2 + )[ + 0 + ] # Remove background class # Find the concat or squeeze node at the end of the box net (multi-scale localization predictor). - if self.model == 'ssd_mobilenet_v2_keras': - box_net_head_name = 'BoxPredictor/ConvolutionalBoxHead_' - box_net = self.find_head_end(box_net_head_name, "Transpose", "Squeeze") + if self.model == "ssd_mobilenet_v2_keras": + box_net_head_name = "BoxPredictor/ConvolutionalBoxHead_" + box_net = self.find_head_end( + box_net_head_name, "Transpose", "Squeeze" + ) else: - box_net_head_name = 'WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalBoxHead' - box_net = self.find_head_end(box_net_head_name, "Transpose", "Concat") + box_net_head_name = "WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalBoxHead" + box_net = self.find_head_end( + box_net_head_name, "Transpose", "Concat" + ) box_net_output = box_net.outputs[0] # 0.1, 0.1, 0.2, 0.2 are localization head variance numbers, they scale box_net_output in order to get accurate coordinates. - variance_adj = np.expand_dims(np.asarray([0.1, 0.1, 0.2, 0.2], dtype=np.float32), axis=(0, 1)) + variance_adj = np.expand_dims( + np.asarray([0.1, 0.1, 0.2, 0.2], dtype=np.float32), axis=(0, 1) + ) # Final Box Net tensor. - box_net_tensor = self.graph.op_with_const("Mul", box_net_head_name+"/scale", box_net_output, variance_adj)[0] + box_net_tensor = self.graph.op_with_const( + "Mul", box_net_head_name + "/scale", box_net_output, variance_adj + )[0] # Getting Faster R-CNN's 1st Class and Box Nets tensors. elif "faster_rcnn" in self.model: # Identify Class Net and Box Net head names - head_names = ['FirstStageBoxPredictor/ConvolutionalClassHead_0/ClassPredictor','FirstStageBoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor'] + head_names = [ + "FirstStageBoxPredictor/ConvolutionalClassHead_0/ClassPredictor", + "FirstStageBoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor", + ] # Find the softmax node at the end of the class net (multi-scale class predictor). class_net = self.find_head_end(head_names[0], "Transpose", "Softmax") @@ -472,12 +693,18 @@ def first_nms(background_class, score_activation, first_nms_threshold, nms_name= # Final Box Net tensor. box_net_output = box_net.outputs[0] - #Insert a squeeze node - squeeze_node = self.graph.squeeze(head_names[1]+"/squeeze", box_net_output) + # Insert a squeeze node + squeeze_node = self.graph.squeeze( + head_names[1] + "/squeeze", box_net_output + ) # 0.1, 0.1, 0.2, 0.2 are localization head variance numbers, they scale box_net_output, in order to get accurate coordinates. - variance_adj = np.expand_dims(np.asarray([0.1, 0.1, 0.2, 0.2], dtype=np.float32), axis=(0, 1)) + variance_adj = np.expand_dims( + np.asarray([0.1, 0.1, 0.2, 0.2], dtype=np.float32), axis=(0, 1) + ) # Final Box Net tensor. - box_net_tensor = self.graph.op_with_const("Mul", head_names[1]+"/scale", squeeze_node, variance_adj)[0] + box_net_tensor = self.graph.op_with_const( + "Mul", head_names[1] + "/scale", squeeze_node, variance_adj + )[0] # Find the split node that separates the box net coordinates and feeds them into the box decoder. box_net_split = self.graph.find_descendant_by_op(box_net, "Split") @@ -487,7 +714,17 @@ def first_nms(background_class, score_activation, first_nms_threshold, nms_name= anchors_tensor = self.extract_anchors_tensor(box_net_split) # Create NMS node. - nms_outputs = self.NMS(box_net_tensor, class_net_tensor, anchors_tensor, background_class, score_activation, self.first_stage_nms_iou_threshold, self.first_stage_nms_score_threshold, first_nms_threshold, nms_name) + nms_outputs = self.NMS( + box_net_tensor, + class_net_tensor, + anchors_tensor, + background_class, + score_activation, + self.first_stage_nms_iou_threshold, + self.first_stage_nms_score_threshold, + first_nms_threshold, + nms_name, + ) # Return NMS's outputs. return nms_outputs @@ -501,26 +738,47 @@ def first_cnr(input): # Locate the last Relu node of the first backbone (pre 1st NMS). Relu node contains feature maps # necessary for CropAndResize plugin. relu_name = "StatefulPartitionedCall/model/" - relu_node = [node for node in self.graph.nodes if node.op == "Relu" and relu_name in node.name][-1] + relu_node = [ + node + for node in self.graph.nodes + if node.op == "Relu" and relu_name in node.name + ][-1] # Before passing 1st NMS's detection boxes (rois) to CropAndResize, we need to clip and normalize them. # Clipping happens for coordinates that are less than 0 and more than self.height. # Normalization is just divison of every coordinate by self.height. - clip_out = self.graph.clip("FirstNMS/detection_boxes_clipper", input, 0, self.height) - div_const = np.expand_dims(np.asarray([self.height, self.width, self.height, self.width], dtype=np.float32), axis=(0, 1)) - div_out = self.graph.op_with_const("Div", "FirstNMS/detection_boxes_normalizer", clip_out[0], div_const) + clip_out = self.graph.clip( + "FirstNMS/detection_boxes_clipper", input, 0, self.height + ) + div_const = np.expand_dims( + np.asarray( + [self.height, self.width, self.height, self.width], dtype=np.float32 + ), + axis=(0, 1), + ) + div_out = self.graph.op_with_const( + "Div", "FirstNMS/detection_boxes_normalizer", clip_out[0], div_const + ) # Linear transformation to convert box coordinates from (TopLeft, BottomRight) Corner encoding # to CenterSize encoding. 1st NMS boxes are multiplied by transformation matrix in order to # encode it into CenterSize format. - matmul_const = np.matrix('0.5 0 -1 0; 0 0.5 0 -1; 0.5 0 1 0; 0 0.5 0 1', dtype=np.float32) - matmul_out = self.graph.matmul("FirstNMS/detection_boxes_conversion", div_out[0], matmul_const) + matmul_const = np.matrix( + "0.5 0 -1 0; 0 0.5 0 -1; 0.5 0 1 0; 0 0.5 0 1", dtype=np.float32 + ) + matmul_out = self.graph.matmul( + "FirstNMS/detection_boxes_conversion", div_out[0], matmul_const + ) # Create Crop and Resize node. cnr_output = self.CropAndResize(div_out, relu_node.outputs[0], "first") # Find MaxPool node that summarizes CropAndResize structure. - maxpool_node = [node for node in self.graph.nodes if node.op == "MaxPool" and "MaxPool2D/MaxPool" in node.name][0] + maxpool_node = [ + node + for node in self.graph.nodes + if node.op == "MaxPool" and "MaxPool2D/MaxPool" in node.name + ][0] maxpool_node.inputs[0] = cnr_output # Return linear transformation node, it will be located between 1st and 2nd NMS, @@ -528,7 +786,13 @@ def first_cnr(input): # In case you are converting Mask R-CNN, feature maps are required for 2nd CropAndResize. return matmul_out[0], relu_node.outputs[0] - def second_nms(background_class, score_activation, encoded_boxes, second_nms_threshold, nms_name=None): + def second_nms( + background_class, + score_activation, + encoded_boxes, + second_nms_threshold, + nms_name=None, + ): """ Updates the graph to replace the 2nd (or final) NMS op by EfficientNMS_TRT TensorRT plugin node. :param background_class: Set EfficientNMS_TRT's background_class atribute. @@ -539,14 +803,20 @@ def second_nms(background_class, score_activation, encoded_boxes, second_nms_thr """ # Identify Class Net and Box Net head names. - second_head_names = ['StatefulPartitionedCall/mask_rcnn_keras_box_predictor/mask_rcnn_class_head/ClassPredictor_dense', - 'StatefulPartitionedCall/mask_rcnn_keras_box_predictor/mask_rcnn_box_head/BoxEncodingPredictor_dense'] + second_head_names = [ + "StatefulPartitionedCall/mask_rcnn_keras_box_predictor/mask_rcnn_class_head/ClassPredictor_dense", + "StatefulPartitionedCall/mask_rcnn_keras_box_predictor/mask_rcnn_box_head/BoxEncodingPredictor_dense", + ] # Find the softmax node at the end of the 2nd class net (multi-scale class predictor). - second_class_net = self.find_head_end(second_head_names[0], "MatMul", "Softmax") + second_class_net = self.find_head_end( + second_head_names[0], "MatMul", "Softmax" + ) # Faster R-CNN's slice operation to adjust third dimension of Class Net's last node tensor (adjusting class values). - slice_out = self.graph.slice(second_head_names[0]+"/slicer", second_class_net.outputs[0], 1, 91, 2) + slice_out = self.graph.slice( + second_head_names[0] + "/slicer", second_class_net.outputs[0], 1, 91, 2 + ) # Final Class Net tensor. second_class_net_tensor = slice_out[0] @@ -561,19 +831,56 @@ def second_nms(background_class, score_activation, encoded_boxes, second_nms_thr # If use_matmul_crop_and_resize in pipeline.config is set to True, expect: [batch_size, first_stage_max_proposals, 4]. # Else use_matmul_crop_and_resize is either False or absent, expect: [batch_size, first_stage_max_proposals, num_classes, 4] if self.matmul_crop_and_resize: - reshape_shape_second = np.asarray([self.batch_size, self.first_stage_max_proposals, second_box_net.outputs[0].shape[1]], dtype=np.int64) + reshape_shape_second = np.asarray( + [ + self.batch_size, + self.first_stage_max_proposals, + second_box_net.outputs[0].shape[1], + ], + dtype=np.int64, + ) else: - reshape_shape_second = np.asarray([self.batch_size, self.first_stage_max_proposals, self.num_classes, second_box_net.outputs[0].shape[1]/self.num_classes], dtype=np.int64) - reshape_node_second = self.graph.op_with_const("Reshape", second_head_names[1]+"/reshape", second_box_net_output, reshape_shape_second) + reshape_shape_second = np.asarray( + [ + self.batch_size, + self.first_stage_max_proposals, + self.num_classes, + second_box_net.outputs[0].shape[1] / self.num_classes, + ], + dtype=np.int64, + ) + reshape_node_second = self.graph.op_with_const( + "Reshape", + second_head_names[1] + "/reshape", + second_box_net_output, + reshape_shape_second, + ) # 0.1, 0.1, 0.2, 0.2 are localization head variance numbers, they scale second_box_net_output, in order to get accurate coordinates. - second_scale_adj = np.expand_dims(np.asarray([0.1, 0.1, 0.2, 0.2], dtype=np.float32), axis=(0, 1)) - second_scale_out = self.graph.op_with_const("Mul", second_head_names[1]+"/scale_second", reshape_node_second[0], second_scale_adj) + second_scale_adj = np.expand_dims( + np.asarray([0.1, 0.1, 0.2, 0.2], dtype=np.float32), axis=(0, 1) + ) + second_scale_out = self.graph.op_with_const( + "Mul", + second_head_names[1] + "/scale_second", + reshape_node_second[0], + second_scale_adj, + ) # Final Box Net tensor. second_box_net_tensor = second_scale_out[0] # Create NMS node. - nms_outputs = self.NMS(second_box_net_tensor, second_class_net_tensor, encoded_boxes, background_class, score_activation, self.second_stage_iou_threshold, self.second_stage_nms_score_threshold, second_nms_threshold, nms_name) + nms_outputs = self.NMS( + second_box_net_tensor, + second_class_net_tensor, + encoded_boxes, + background_class, + score_activation, + self.second_stage_iou_threshold, + self.second_stage_nms_score_threshold, + second_nms_threshold, + nms_name, + ) return nms_outputs @@ -585,24 +892,36 @@ def second_cnr(feature_maps, second_nms_outputs): # Before passing 2nd NMS's detection boxes (rois) to second CropAndResize, we need to clip them. # Clipping happens for coordinates that are less than 0 and more than 1 (binary). - clip_out = self.graph.clip("SecondNMS/detection_boxes_clipper", second_nms_outputs[1], 0, 1) + clip_out = self.graph.clip( + "SecondNMS/detection_boxes_clipper", second_nms_outputs[1], 0, 1 + ) # Create Crop and Resize node. cnr_output = self.CropAndResize(clip_out, feature_maps, "second") # Find MaxPool node that summarizes CropAndResize structure - maxpool_node = [node for node in self.graph.nodes if node.op == "MaxPool" and "MaxPool2D/MaxPool_1" in node.name][0] + maxpool_node = [ + node + for node in self.graph.nodes + if node.op == "MaxPool" and "MaxPool2D/MaxPool_1" in node.name + ][0] maxpool_node.inputs[0] = cnr_output # Reshape node that is preparing 2nd NMS class outputs for Add node that comes next. # [self.batch_size, self.first_stage_max_proposals] -> [self.first_stage_max_proposals*self.batch_size] - class_reshape_shape = np.asarray([self.first_stage_max_proposals*self.batch_size], dtype=np.int64) - class_reshape_node = self.graph.op_with_const("Reshape", "Reshape_Class", second_nms_outputs[3], class_reshape_shape) + class_reshape_shape = np.asarray( + [self.first_stage_max_proposals * self.batch_size], dtype=np.int64 + ) + class_reshape_node = self.graph.op_with_const( + "Reshape", "Reshape_Class", second_nms_outputs[3], class_reshape_shape + ) # Find sigmoid node in the end of the network, applies sigmoid to get instance segmentation masks - last_sigmoid_node = self.graph.find_descendant_by_op(maxpool_node, "Sigmoid", 40) + last_sigmoid_node = self.graph.find_descendant_by_op( + maxpool_node, "Sigmoid", 40 + ) - if (self.num_classes > 1): + if self.num_classes > 1: # Find first ancestor of Sigmoid of operation type Add. This Add node is one of the Gather node inputs, # Gather node performs gather on 0th axis of data tensor and requires indices that set tesnors to be withing bounds, # this Add node provides the bounds for Gather. @@ -610,8 +929,21 @@ def second_cnr(feature_maps, second_nms_outputs): add_node.inputs[1] = class_reshape_node[0] # Final Reshape node, reshapes output of Sigmoid, important for various batch_size support. - final_reshape_shape = np.asarray([self.batch_size, self.first_stage_max_proposals, self.mask_height, self.mask_width], dtype=np.int64) - final_reshape_node = self.graph.op_with_const("Reshape", "Reshape_Final_Masks", last_sigmoid_node.outputs[0], final_reshape_shape) + final_reshape_shape = np.asarray( + [ + self.batch_size, + self.first_stage_max_proposals, + self.mask_height, + self.mask_width, + ], + dtype=np.int64, + ) + final_reshape_node = self.graph.op_with_const( + "Reshape", + "Reshape_Final_Masks", + last_sigmoid_node.outputs[0], + final_reshape_shape, + ) final_reshape_node[0].dtype = np.float32 final_reshape_node[0].name = "detection_masks" @@ -623,17 +955,27 @@ def second_cnr(feature_maps, second_nms_outputs): self.graph.outputs = first_nms(-1, True, first_nms_threshold) self.sanitize() # If your model is Faster R-CNN, you will need 2 NMS nodes with CropAndResize in between. - elif "faster_rcnn" in self.model and self.mask_height is None and self.mask_width is None: + elif ( + "faster_rcnn" in self.model + and self.mask_height is None + and self.mask_width is None + ): first_nms_outputs = first_nms(0, False, first_nms_threshold, "rpn") first_cnr_output, feature_maps = first_cnr(first_nms_outputs[1]) # Set graph outputs. - self.graph.outputs = second_nms(-1, False, first_cnr_output, second_nms_threshold) + self.graph.outputs = second_nms( + -1, False, first_cnr_output, second_nms_threshold + ) self.sanitize() # Mask R-CNN - elif "faster_rcnn" in self.model and not (self.mask_height is None and self.mask_width is None): + elif "faster_rcnn" in self.model and not ( + self.mask_height is None and self.mask_width is None + ): first_nms_outputs = first_nms(0, False, first_nms_threshold, "rpn") first_cnr_output, feature_maps = first_cnr(first_nms_outputs[1]) - second_nms_outputs = second_nms(-1, False, first_cnr_output, second_nms_threshold) + second_nms_outputs = second_nms( + -1, False, first_cnr_output, second_nms_threshold + ) second_cnr_output = second_cnr(feature_maps, second_nms_outputs) # Append segmentation head output. second_nms_outputs.append(second_cnr_output) @@ -655,20 +997,57 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("-p", "--pipeline_config", help="Pipeline configuration file to load", type=str) - parser.add_argument("-m", "--saved_model", help="The TensorFlow saved model directory to load", type=str) - parser.add_argument("-o", "--onnx", help="The output ONNX model file to write", type=str) - parser.add_argument("-b", "--batch_size", help="Batch size for the model", type=int, default=1) - parser.add_argument("-t1", "--first_nms_threshold", help="Override the score threshold for the 1st NMS operation", type=float) - parser.add_argument("-t2", "--second_nms_threshold", help="Override the score threshold for the 2nd NMS operation", type=float) - parser.add_argument("-d", "--debug", action='append', help="Add an extra output to debug a particular node") - parser.add_argument("-f", "--input_format", default="NHWC", choices=["NHWC", "NCHW"], - help="Set the input shape of the graph, as comma-separated dimensions in NCHW or NHWC format, default: NHWC") - parser.add_argument("--tf2onnx", help="The path where to save the intermediate ONNX graph generated by tf2onnx, " - "useful for debugging purposes, default: not saved", type=str) + parser.add_argument( + "-p", "--pipeline_config", help="Pipeline configuration file to load", type=str + ) + parser.add_argument( + "-m", + "--saved_model", + help="The TensorFlow saved model directory to load", + type=str, + ) + parser.add_argument( + "-o", "--onnx", help="The output ONNX model file to write", type=str + ) + parser.add_argument( + "-b", "--batch_size", help="Batch size for the model", type=int, default=1 + ) + parser.add_argument( + "-t1", + "--first_nms_threshold", + help="Override the score threshold for the 1st NMS operation", + type=float, + ) + parser.add_argument( + "-t2", + "--second_nms_threshold", + help="Override the score threshold for the 2nd NMS operation", + type=float, + ) + parser.add_argument( + "-d", + "--debug", + action="append", + help="Add an extra output to debug a particular node", + ) + parser.add_argument( + "-f", + "--input_format", + default="NHWC", + choices=["NHWC", "NCHW"], + help="Set the input shape of the graph, as comma-separated dimensions in NCHW or NHWC format, default: NHWC", + ) + parser.add_argument( + "--tf2onnx", + help="The path where to save the intermediate ONNX graph generated by tf2onnx, " + "useful for debugging purposes, default: not saved", + type=str, + ) args = parser.parse_args() if not all([args.pipeline_config, args.saved_model, args.onnx]): parser.print_help() - print("\nThese arguments are required: --pipeline_config, --saved_model and --onnx") + print( + "\nThese arguments are required: --pipeline_config, --saved_model and --onnx" + ) sys.exit(1) main(args) diff --git a/samples/python/tensorflow_object_detection_api/eval_coco.py b/samples/python/tensorflow_object_detection_api/eval_coco.py index 5086c660..f04c17f3 100644 --- a/samples/python/tensorflow_object_detection_api/eval_coco.py +++ b/samples/python/tensorflow_object_detection_api/eval_coco.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,23 +24,35 @@ from infer import TensorRTInfer from image_batcher import ImageBatcher + def main(args): try: import object_detection.metrics.coco_tools as coco_tools except ImportError: - print("Could not import the 'object_detection.metrics.coco_tools' module from TFOD. Maybe you did not install TFOD API") - print("Please install TensorFlow 2 Object Detection API, check https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html") + print( + "Could not import the 'object_detection.metrics.coco_tools' module from TFOD. Maybe you did not install TFOD API" + ) + print( + "Please install TensorFlow 2 Object Detection API, check https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html" + ) sys.exit(1) - trt_infer = TensorRTInfer(args.engine, args.preprocessor, args.detection_type, args.iou_threshold) - batcher = ImageBatcher(args.input, *trt_infer.input_spec(), preprocessor=args.preprocessor) + trt_infer = TensorRTInfer( + args.engine, args.preprocessor, args.detection_type, args.iou_threshold + ) + batcher = ImageBatcher( + args.input, *trt_infer.input_spec(), preprocessor=args.preprocessor + ) # Read annotations json as dictionary. with open(args.annotations) as f: data = json.load(f) groundtruth = coco_tools.COCOWrapper(data, detection_type=args.detection_type) detections_list = [] for batch, images, scales in batcher.get_batch(): - print("Processing Image {} / {}".format(batcher.image_index, batcher.num_images), end="\r") + print( + "Processing Image {} / {}".format(batcher.image_index, batcher.num_images), + end="\r", + ) detections = trt_infer.infer(batch, scales, args.nms_threshold) for i in range(len(images)): # Get inference image resolution. @@ -49,43 +61,52 @@ def main(args): for n in range(len(detections[i])): source_id = int(os.path.splitext(os.path.basename(images[i]))[0]) det = detections[i][n] - if args.detection_type == 'bbox': + if args.detection_type == "bbox": coco_det = { - 'image_id': source_id, - 'category_id': det['class']+1, # adjust class num - 'bbox': [det['xmin'], det['ymin'], det['xmax'] - det['xmin'], det['ymax'] - det['ymin']], - 'score': det['score'] + "image_id": source_id, + "category_id": det["class"] + 1, # adjust class num + "bbox": [ + det["xmin"], + det["ymin"], + det["xmax"] - det["xmin"], + det["ymax"] - det["ymin"], + ], + "score": det["score"], } detections_list.append(coco_det) - elif args.detection_type == 'segmentation': + elif args.detection_type == "segmentation": # Get detection bbox resolution. - det_width = round(det['xmax'] - det['xmin']) - det_height = round(det['ymax'] - det['ymin']) + det_width = round(det["xmax"] - det["xmin"]) + det_height = round(det["ymax"] - det["ymin"]) # Create an image out of predicted mask array. - small_mask = Image.fromarray(det['mask']) + small_mask = Image.fromarray(det["mask"]) # Upsample mask to detection bbox's size. - mask = small_mask.resize((det_width, det_height), resample=Image.BILINEAR) + mask = small_mask.resize( + (det_width, det_height), resample=Image.BILINEAR + ) # Create an original image sized template for correct mask placement. pad = Image.new("L", (im_width, im_height)) # Place your mask according to detection bbox placement. - pad.paste(mask, (round(det['xmin']), (round(det['ymin'])))) + pad.paste(mask, (round(det["xmin"]), (round(det["ymin"])))) # Reconvert mask into numpy array for evaluation. padded_mask = np.array(pad) # Add one more dimension of 1, this is required by ExportSingleImageDetectionMasksToCoco. final_mask = padded_mask[np.newaxis, :, :] # Export detection mask to COCO format - coco_mask = coco_tools.ExportSingleImageDetectionMasksToCoco(image_id=source_id, - category_id_set=set(list(range(1,91))), - detection_classes=np.array([det['class']+1]), - detection_scores=np.array([det['score']]), - detection_masks=final_mask) + coco_mask = coco_tools.ExportSingleImageDetectionMasksToCoco( + image_id=source_id, + category_id_set=set(list(range(1, 91))), + detection_classes=np.array([det["class"] + 1]), + detection_scores=np.array([det["score"]]), + detection_masks=final_mask, + ) detections_list.append(coco_mask[0]) # Finish evalutions. detections = groundtruth.LoadAnnotations(detections_list) - if args.detection_type == 'bbox': + if args.detection_type == "bbox": evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, iou_type="bbox") - elif args.detection_type == 'segmentation': + elif args.detection_type == "segmentation": evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, iou_type="segm") evaluator.ComputeMetrics() @@ -93,20 +114,46 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with.") - parser.add_argument("-i", "--input", - help="The input to infer, either a single image path, or a directory of images.") - parser.add_argument("-d", "--detection_type", default="bbox", choices=["bbox", "segmentation"], - help="Detection type for COCO, either bbox or if you are using Mask R-CNN's instance segmentation - segmentation.") - parser.add_argument("-a", "--annotations", help="Set the json file to use for COCO instance annotations.") - parser.add_argument("-t", "--nms_threshold", type=float, - help="Override the score threshold for the NMS operation, if higher than the threshold in the engine.") - parser.add_argument("--iou_threshold", default=0.5, type=float, - help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0.") - parser.add_argument("--preprocessor", default="fixed_shape_resizer", choices=["fixed_shape_resizer", "keep_aspect_ratio_resizer"], - help="Select the image preprocessor to use based on your pipeline.config, either 'fixed_shape_resizer' or 'keep_aspect_ratio_resizer', default: fixed_shape_resizer.") + parser.add_argument( + "-i", + "--input", + help="The input to infer, either a single image path, or a directory of images.", + ) + parser.add_argument( + "-d", + "--detection_type", + default="bbox", + choices=["bbox", "segmentation"], + help="Detection type for COCO, either bbox or if you are using Mask R-CNN's instance segmentation - segmentation.", + ) + parser.add_argument( + "-a", + "--annotations", + help="Set the json file to use for COCO instance annotations.", + ) + parser.add_argument( + "-t", + "--nms_threshold", + type=float, + help="Override the score threshold for the NMS operation, if higher than the threshold in the engine.", + ) + parser.add_argument( + "--iou_threshold", + default=0.5, + type=float, + help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0.", + ) + parser.add_argument( + "--preprocessor", + default="fixed_shape_resizer", + choices=["fixed_shape_resizer", "keep_aspect_ratio_resizer"], + help="Select the image preprocessor to use based on your pipeline.config, either 'fixed_shape_resizer' or 'keep_aspect_ratio_resizer', default: fixed_shape_resizer.", + ) args = parser.parse_args() if not all([args.engine, args.input, args.annotations, args.preprocessor]): parser.print_help() - print("\nThese arguments are required: --engine --input --output and --preprocessor") + print( + "\nThese arguments are required: --engine --input --output and --preprocessor" + ) sys.exit(1) main(args) diff --git a/samples/python/tensorflow_object_detection_api/image_batcher.py b/samples/python/tensorflow_object_detection_api/image_batcher.py index c40e86c8..202e998d 100644 --- a/samples/python/tensorflow_object_detection_api/image_batcher.py +++ b/samples/python/tensorflow_object_detection_api/image_batcher.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -27,7 +27,15 @@ class ImageBatcher: Creates batches of pre-processed images. """ - def __init__(self, input, shape, dtype, max_num_images=None, exact_batches=False, preprocessor="fixed_shape_resizer"): + def __init__( + self, + input, + shape, + dtype, + max_num_images=None, + exact_batches=False, + preprocessor="fixed_shape_resizer", + ): """ :param input: The input directory to read images from. :param shape: The tensor shape of the batch to prepare, either in NCHW or NHWC format. @@ -45,10 +53,16 @@ def __init__(self, input, shape, dtype, max_num_images=None, exact_batches=False extensions = [".jpg", ".jpeg", ".png", ".bmp"] def is_image(path): - return os.path.isfile(path) and os.path.splitext(path)[1].lower() in extensions + return ( + os.path.isfile(path) and os.path.splitext(path)[1].lower() in extensions + ) if os.path.isdir(input): - self.images = [os.path.join(input, f) for f in os.listdir(input) if is_image(os.path.join(input, f))] + self.images = [ + os.path.join(input, f) + for f in os.listdir(input) + if is_image(os.path.join(input, f)) + ] self.images.sort() elif os.path.isfile(input): if is_image(input): @@ -85,7 +99,7 @@ def is_image(path): if self.num_images < 1: print("Not enough images to create batches") sys.exit(1) - self.images = self.images[0:self.num_images] + self.images = self.images[0 : self.num_images] # Subdivide the list of images into batches self.num_batches = 1 + int((self.num_images - 1) / self.batch_size) @@ -133,7 +147,10 @@ def resize_pad(image, pad_color=(0, 0, 0)): return image, scale elif self.preprocessor == "keep_aspect_ratio_resizer": scale = 1.0 / max(width_scale, height_scale) - image = image.resize((round(width * scale), round(height * scale)), resample=Image.BILINEAR) + image = image.resize( + (round(width * scale), round(height * scale)), + resample=Image.BILINEAR, + ) pad = Image.new("RGB", (self.width, self.height)) pad.paste(pad_color, [0, 0, self.width, self.height]) pad.paste(image) @@ -141,9 +158,12 @@ def resize_pad(image, pad_color=(0, 0, 0)): scale = None image = Image.open(image_path) - image = image.convert(mode='RGB') - if self.preprocessor == "fixed_shape_resizer" or self.preprocessor == "keep_aspect_ratio_resizer": - #Resize & Pad with ImageNet mean values and keep as [0,255] Normalization + image = image.convert(mode="RGB") + if ( + self.preprocessor == "fixed_shape_resizer" + or self.preprocessor == "keep_aspect_ratio_resizer" + ): + # Resize & Pad with ImageNet mean values and keep as [0,255] Normalization image, scale = resize_pad(image, (124, 116, 104)) image = np.asarray(image, dtype=self.dtype) else: diff --git a/samples/python/tensorflow_object_detection_api/infer.py b/samples/python/tensorflow_object_detection_api/infer.py index 3ea07863..298b7a0c 100644 --- a/samples/python/tensorflow_object_detection_api/infer.py +++ b/samples/python/tensorflow_object_detection_api/infer.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -28,6 +28,7 @@ from image_batcher import ImageBatcher from visualize import visualize_detections + class TensorRTInfer: """ Implements inference for the Model TensorRT engine. @@ -68,11 +69,11 @@ def __init__(self, engine_path, preprocessor, detection_type, iou_threshold): size *= s allocation = common.cuda_call(cudart.cudaMalloc(size)) binding = { - 'index': i, - 'name': name, - 'dtype': np.dtype(trt.nptype(dtype)), - 'shape': list(shape), - 'allocation': allocation, + "index": i, + "name": name, + "dtype": np.dtype(trt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, } self.allocations.append(allocation) if is_input: @@ -90,7 +91,7 @@ def input_spec(self): Get the specs for the input tensor of the network. Useful to prepare memory allocations. :return: Two items, the shape of the input tensor and its (numpy) datatype. """ - return self.inputs[0]['shape'], self.inputs[0]['dtype'] + return self.inputs[0]["shape"], self.inputs[0]["dtype"] def output_spec(self): """ @@ -99,7 +100,7 @@ def output_spec(self): """ specs = [] for o in self.outputs: - specs.append((o['shape'], o['dtype'])) + specs.append((o["shape"], o["dtype"])) return specs def infer(self, batch, scales=None, nms_threshold=None): @@ -117,10 +118,12 @@ def infer(self, batch, scales=None, nms_threshold=None): outputs.append(np.zeros(shape, dtype)) # Process I/O and execute the network - common.memcpy_host_to_device(self.inputs[0]['allocation'], np.ascontiguousarray(batch)) + common.memcpy_host_to_device( + self.inputs[0]["allocation"], np.ascontiguousarray(batch) + ) self.context.execute_v2(self.allocations) for o in range(len(outputs)): - common.memcpy_device_to_host(outputs[o], self.outputs[o]['allocation']) + common.memcpy_device_to_host(outputs[o], self.outputs[o]["allocation"]) # Process the results nums = outputs[0] @@ -131,14 +134,14 @@ def infer(self, batch, scales=None, nms_threshold=None): if len(outputs) == 5: masks = outputs[4] detections = [] - normalized = (np.max(boxes) < 2.0) + normalized = np.max(boxes) < 2.0 for i in range(self.batch_size): detections.append([]) for n in range(int(nums[i])): # Depending on preprocessor, box scaling will be slightly different. if self.preprocessor == "fixed_shape_resizer": - scale_x = self.inputs[0]['shape'][1] if normalized else 1.0 - scale_y = self.inputs[0]['shape'][2] if normalized else 1.0 + scale_x = self.inputs[0]["shape"][1] if normalized else 1.0 + scale_y = self.inputs[0]["shape"][2] if normalized else 1.0 if scales and i < len(scales): scale_x /= scales[i][0] @@ -146,11 +149,11 @@ def infer(self, batch, scales=None, nms_threshold=None): if nms_threshold and scores[i][n] < nms_threshold: continue # Depending on detection type you need slightly different data. - if self.detection_type == 'bbox': + if self.detection_type == "bbox": mask = None # Segmentation is only supported with Mask R-CNN, which has # fixed_shape_resizer as image_resizer (lookup pipeline.config) - elif self.detection_type == 'segmentation': + elif self.detection_type == "segmentation": # Select a mask mask = masks[i][n] # Slight scaling, to get binary masks after float32 -> uint8 @@ -161,7 +164,7 @@ def infer(self, batch, scales=None, nms_threshold=None): elif self.preprocessor == "keep_aspect_ratio_resizer": # No segmentation models with keep_aspect_ratio_resizer mask = None - scale = self.inputs[0]['shape'][2] if normalized else 1.0 + scale = self.inputs[0]["shape"][2] if normalized else 1.0 if scales and i < len(scales): scale /= scales[i] scale_y = scale @@ -169,15 +172,17 @@ def infer(self, batch, scales=None, nms_threshold=None): if nms_threshold and scores[i][n] < nms_threshold: continue # Append to detections - detections[i].append({ - 'ymin': boxes[i][n][0] * scale_y, - 'xmin': boxes[i][n][1] * scale_x, - 'ymax': boxes[i][n][2] * scale_y, - 'xmax': boxes[i][n][3] * scale_x, - 'score': scores[i][n], - 'class': int(classes[i][n]), - 'mask': mask, - }) + detections[i].append( + { + "ymin": boxes[i][n][0] * scale_y, + "xmin": boxes[i][n][1] * scale_x, + "ymax": boxes[i][n][2] * scale_y, + "xmax": boxes[i][n][3] * scale_x, + "score": scores[i][n], + "class": int(classes[i][n]), + "mask": mask, + } + ) return detections @@ -191,10 +196,17 @@ def main(args): for i, label in enumerate(f): labels.append(label.strip()) - trt_infer = TensorRTInfer(args.engine, args.preprocessor, args.detection_type, args.iou_threshold) - batcher = ImageBatcher(args.input, *trt_infer.input_spec(), preprocessor=args.preprocessor) + trt_infer = TensorRTInfer( + args.engine, args.preprocessor, args.detection_type, args.iou_threshold + ) + batcher = ImageBatcher( + args.input, *trt_infer.input_spec(), preprocessor=args.preprocessor + ) for batch, images, scales in batcher.get_batch(): - print("Processing Image {} / {}".format(batcher.image_index, batcher.num_images), end="\r") + print( + "Processing Image {} / {}".format(batcher.image_index, batcher.num_images), + end="\r", + ) detections = trt_infer.infer(batch, scales, args.nms_threshold) for i in range(len(images)): basename = os.path.splitext(os.path.basename(images[i]))[0] @@ -204,7 +216,14 @@ def main(args): # Text Results output_results = "" for d in detections[i]: - line = [d['xmin'], d['ymin'], d['xmax'], d['ymax'], d['score'], d['class']] + line = [ + d["xmin"], + d["ymin"], + d["xmax"], + d["ymax"], + d["score"], + d["class"], + ] output_results += "\t".join([str(f) for f in line]) + "\n" with open(os.path.join(args.output, "{}.txt".format(basename)), "w") as f: f.write(output_results) @@ -214,22 +233,54 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("-e", "--engine", default=None, help="The serialized TensorRT engine") - parser.add_argument("-i", "--input", default=None, help="Path to the image or directory to process") - parser.add_argument("-o", "--output", default=None, help="Directory where to save the visualization results") - parser.add_argument("-l", "--labels", default="./labels_coco.txt", - help="File to use for reading the class labels from, default: ./labels_coco.txt") - parser.add_argument("-d", "--detection_type", default="bbox", choices=["bbox", "segmentation"], - help="Detection type for COCO, either bbox or if you are using Mask R-CNN's instance segmentation - segmentation") - parser.add_argument("-t", "--nms_threshold", type=float, - help="Override the score threshold for the NMS operation, if higher than the threshold in the engine.") - parser.add_argument("--iou_threshold", default=0.5, type=float, - help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0") - parser.add_argument("--preprocessor", default="fixed_shape_resizer", choices=["fixed_shape_resizer", "keep_aspect_ratio_resizer"], - help="Select the image preprocessor to use based on your pipeline.config, either 'fixed_shape_resizer' or 'keep_aspect_ratio_resizer', default: fixed_shape_resizer") + parser.add_argument( + "-e", "--engine", default=None, help="The serialized TensorRT engine" + ) + parser.add_argument( + "-i", "--input", default=None, help="Path to the image or directory to process" + ) + parser.add_argument( + "-o", + "--output", + default=None, + help="Directory where to save the visualization results", + ) + parser.add_argument( + "-l", + "--labels", + default="./labels_coco.txt", + help="File to use for reading the class labels from, default: ./labels_coco.txt", + ) + parser.add_argument( + "-d", + "--detection_type", + default="bbox", + choices=["bbox", "segmentation"], + help="Detection type for COCO, either bbox or if you are using Mask R-CNN's instance segmentation - segmentation", + ) + parser.add_argument( + "-t", + "--nms_threshold", + type=float, + help="Override the score threshold for the NMS operation, if higher than the threshold in the engine.", + ) + parser.add_argument( + "--iou_threshold", + default=0.5, + type=float, + help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0", + ) + parser.add_argument( + "--preprocessor", + default="fixed_shape_resizer", + choices=["fixed_shape_resizer", "keep_aspect_ratio_resizer"], + help="Select the image preprocessor to use based on your pipeline.config, either 'fixed_shape_resizer' or 'keep_aspect_ratio_resizer', default: fixed_shape_resizer", + ) args = parser.parse_args() if not all([args.engine, args.input, args.output, args.preprocessor]): parser.print_help() - print("\nThese arguments are required: --engine --input --output and --preprocessor") + print( + "\nThese arguments are required: --engine --input --output and --preprocessor" + ) sys.exit(1) main(args) diff --git a/samples/python/tensorflow_object_detection_api/onnx_utils.py b/samples/python/tensorflow_object_detection_api/onnx_utils.py index b539197a..07819328 100644 --- a/samples/python/tensorflow_object_detection_api/onnx_utils.py +++ b/samples/python/tensorflow_object_detection_api/onnx_utils.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,6 +23,7 @@ logging.getLogger("SSDHelper").setLevel(logging.INFO) log = logging.getLogger("SSDHelper") + @gs.Graph.register() def op_with_const(self, op, name, input, value): """ @@ -35,7 +36,10 @@ def op_with_const(self, op, name, input, value): input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created {} node '{}': {}".format(op, name, value.squeeze())) const = gs.Constant(name="{}_value:0".format(name), values=value) - return self.layer(name=name, op=op, inputs=[input_tensor, const], outputs=[name + ":0"]) + return self.layer( + name=name, op=op, inputs=[input_tensor, const], outputs=[name + ":0"] + ) + @gs.Graph.register() def matmul(self, name, input, value): @@ -48,7 +52,10 @@ def matmul(self, name, input, value): input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created {} node '{}': {}".format("MatMul", name, value.squeeze())) const = gs.Constant(name="{}_value:0".format(name), values=value) - return self.layer(name=name, op="MatMul", inputs=[input_tensor, const], outputs=[name + ":0"]) + return self.layer( + name=name, op="MatMul", inputs=[input_tensor, const], outputs=[name + ":0"] + ) + @gs.Graph.register() def clip(self, name, input, clip_min, clip_max): @@ -61,9 +68,19 @@ def clip(self, name, input, clip_min, clip_max): """ input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created {} node '{}".format("Clip", name)) - const_min = gs.Constant(name="{}_value:0".format(name), values=np.asarray([clip_min], dtype=np.float32)) - const_max = gs.Constant(name="{}_value:1".format(name), values=np.asarray([clip_max], dtype=np.float32)) - return self.layer(name=name, op="Clip", inputs=[input_tensor, const_min, const_max], outputs=[name + ":0"]) + const_min = gs.Constant( + name="{}_value:0".format(name), values=np.asarray([clip_min], dtype=np.float32) + ) + const_max = gs.Constant( + name="{}_value:1".format(name), values=np.asarray([clip_max], dtype=np.float32) + ) + return self.layer( + name=name, + op="Clip", + inputs=[input_tensor, const_min, const_max], + outputs=[name + ":0"], + ) + @gs.Graph.register() def slice(self, name, input, starts, ends, axes): @@ -79,10 +96,22 @@ def slice(self, name, input, starts, ends, axes): input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created {} node '{}".format("Slice", name)) - const_start = gs.Constant(name="{}_value:0".format(name), values=np.asarray([starts], dtype=np.int64)) - const_end = gs.Constant(name="{}_value:1".format(name), values=np.asarray([ends], dtype=np.int64)) - const_axes = gs.Constant(name="{}_value:2".format(name), values=np.asarray([axes], dtype=np.int64)) - return self.layer(name=name, op="Slice", inputs=[input_tensor, const_start, const_end, const_axes], outputs=[name + ":0"]) + const_start = gs.Constant( + name="{}_value:0".format(name), values=np.asarray([starts], dtype=np.int64) + ) + const_end = gs.Constant( + name="{}_value:1".format(name), values=np.asarray([ends], dtype=np.int64) + ) + const_axes = gs.Constant( + name="{}_value:2".format(name), values=np.asarray([axes], dtype=np.int64) + ) + return self.layer( + name=name, + op="Slice", + inputs=[input_tensor, const_start, const_end, const_axes], + outputs=[name + ":0"], + ) + @gs.Graph.register() def unsqueeze(self, name, input, axes=[3]): @@ -96,7 +125,14 @@ def unsqueeze(self, name, input, axes=[3]): """ input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created Unsqueeze node '{}': {}".format(name, axes)) - return self.layer(name=name, op="Unsqueeze", inputs=[input_tensor], outputs=[name + ":0"], attrs={'axes': axes}) + return self.layer( + name=name, + op="Unsqueeze", + inputs=[input_tensor], + outputs=[name + ":0"], + attrs={"axes": axes}, + ) + @gs.Graph.register() def squeeze(self, name, input, axes=[2]): @@ -110,7 +146,14 @@ def squeeze(self, name, input, axes=[2]): """ input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created Squeeze node '{}': {}".format(name, axes)) - return self.layer(name=name, op="Squeeze", inputs=[input_tensor], outputs=[name + ":0"], attrs={'axes': axes}) + return self.layer( + name=name, + op="Squeeze", + inputs=[input_tensor], + outputs=[name + ":0"], + attrs={"axes": axes}, + ) + @gs.Graph.register() def transpose(self, name, input, perm): @@ -124,7 +167,14 @@ def transpose(self, name, input, perm): """ input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created Transpose node '{}': {}".format(name, perm)) - return self.layer(name=name, op="Transpose", inputs=[input_tensor], outputs=[name + ":0"], attrs={'perm': perm}) + return self.layer( + name=name, + op="Transpose", + inputs=[input_tensor], + outputs=[name + ":0"], + attrs={"perm": perm}, + ) + @gs.Graph.register() def sigmoid(self, name, input): @@ -137,7 +187,10 @@ def sigmoid(self, name, input): """ input_tensor = input if type(input) is gs.Variable else input[0] log.debug("Created Sigmoid node '{}'".format(name)) - return self.layer(name=name, op="Sigmoid", inputs=[input_tensor], outputs=[name + ":0"]) + return self.layer( + name=name, op="Sigmoid", inputs=[input_tensor], outputs=[name + ":0"] + ) + @gs.Graph.register() def plugin(self, op, name, inputs, outputs, attrs): @@ -154,7 +207,10 @@ def plugin(self, op, name, inputs, outputs, attrs): """ input_tensors = inputs if type(inputs) is list else [inputs] log.debug("Created TRT Plugin node '{}': {}".format(name, attrs)) - return self.layer(op=op, name=name, inputs=input_tensors, outputs=outputs, attrs=attrs) + return self.layer( + op=op, name=name, inputs=input_tensors, outputs=outputs, attrs=attrs + ) + @gs.Graph.register() def find_node_by_op(self, op): @@ -169,6 +225,7 @@ def find_node_by_op(self, op): return node return None + @gs.Graph.register() def find_descendant_by_op(self, node, op, depth=10): """ diff --git a/samples/python/tensorflow_object_detection_api/visualize.py b/samples/python/tensorflow_object_detection_api/visualize.py index f3e4ffc1..f88ed6f0 100644 --- a/samples/python/tensorflow_object_detection_api/visualize.py +++ b/samples/python/tensorflow_object_detection_api/visualize.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,6 +16,7 @@ # import numpy as np + np.set_printoptions(threshold=np.inf, suppress=True) import PIL.Image as Image @@ -24,95 +25,228 @@ import PIL.ImageFilter as ImageFilter - -COLORS = ['GoldenRod', 'MediumTurquoise', 'GreenYellow', 'SteelBlue', 'DarkSeaGreen', 'SeaShell', 'LightGrey', - 'IndianRed', 'DarkKhaki', 'LawnGreen', 'WhiteSmoke', 'Peru', 'LightCoral', 'FireBrick', 'OldLace', - 'LightBlue', 'SlateGray', 'OliveDrab', 'NavajoWhite', 'PaleVioletRed', 'SpringGreen', 'AliceBlue', 'Violet', - 'DeepSkyBlue', 'Red', 'MediumVioletRed', 'PaleTurquoise', 'Tomato', 'Azure', 'Yellow', 'Cornsilk', - 'Aquamarine', 'CadetBlue', 'CornflowerBlue', 'DodgerBlue', 'Olive', 'Orchid', 'LemonChiffon', 'Sienna', - 'OrangeRed', 'Orange', 'DarkSalmon', 'Magenta', 'Wheat', 'Lime', 'GhostWhite', 'SlateBlue', 'Aqua', - 'MediumAquaMarine', 'LightSlateGrey', 'MediumSeaGreen', 'SandyBrown', 'YellowGreen', 'Plum', 'FloralWhite', - 'LightPink', 'Thistle', 'DarkViolet', 'Pink', 'Crimson', 'Chocolate', 'DarkGrey', 'Ivory', 'PaleGreen', - 'DarkGoldenRod', 'LavenderBlush', 'SlateGrey', 'DeepPink', 'Gold', 'Cyan', 'LightSteelBlue', 'MediumPurple', - 'ForestGreen', 'DarkOrange', 'Tan', 'Salmon', 'PaleGoldenRod', 'LightGreen', 'LightSlateGray', 'HoneyDew', - 'Fuchsia', 'LightSeaGreen', 'DarkOrchid', 'Green', 'Chartreuse', 'LimeGreen', 'AntiqueWhite', 'Beige', - 'Gainsboro', 'Bisque', 'SaddleBrown', 'Silver', 'Lavender', 'Teal', 'LightCyan', 'PapayaWhip', 'Purple', - 'Coral', 'BurlyWood', 'LightGray', 'Snow', 'MistyRose', 'PowderBlue', 'DarkCyan', 'White', 'Turquoise', - 'MediumSlateBlue', 'PeachPuff', 'Moccasin', 'LightSalmon', 'SkyBlue', 'Khaki', 'MediumSpringGreen', - 'BlueViolet', 'MintCream', 'Linen', 'SeaGreen', 'HotPink', 'LightYellow', 'BlanchedAlmond', 'RoyalBlue', - 'RosyBrown', 'MediumOrchid', 'DarkTurquoise', 'LightGoldenRodYellow', 'LightSkyBlue'] +COLORS = [ + "GoldenRod", + "MediumTurquoise", + "GreenYellow", + "SteelBlue", + "DarkSeaGreen", + "SeaShell", + "LightGrey", + "IndianRed", + "DarkKhaki", + "LawnGreen", + "WhiteSmoke", + "Peru", + "LightCoral", + "FireBrick", + "OldLace", + "LightBlue", + "SlateGray", + "OliveDrab", + "NavajoWhite", + "PaleVioletRed", + "SpringGreen", + "AliceBlue", + "Violet", + "DeepSkyBlue", + "Red", + "MediumVioletRed", + "PaleTurquoise", + "Tomato", + "Azure", + "Yellow", + "Cornsilk", + "Aquamarine", + "CadetBlue", + "CornflowerBlue", + "DodgerBlue", + "Olive", + "Orchid", + "LemonChiffon", + "Sienna", + "OrangeRed", + "Orange", + "DarkSalmon", + "Magenta", + "Wheat", + "Lime", + "GhostWhite", + "SlateBlue", + "Aqua", + "MediumAquaMarine", + "LightSlateGrey", + "MediumSeaGreen", + "SandyBrown", + "YellowGreen", + "Plum", + "FloralWhite", + "LightPink", + "Thistle", + "DarkViolet", + "Pink", + "Crimson", + "Chocolate", + "DarkGrey", + "Ivory", + "PaleGreen", + "DarkGoldenRod", + "LavenderBlush", + "SlateGrey", + "DeepPink", + "Gold", + "Cyan", + "LightSteelBlue", + "MediumPurple", + "ForestGreen", + "DarkOrange", + "Tan", + "Salmon", + "PaleGoldenRod", + "LightGreen", + "LightSlateGray", + "HoneyDew", + "Fuchsia", + "LightSeaGreen", + "DarkOrchid", + "Green", + "Chartreuse", + "LimeGreen", + "AntiqueWhite", + "Beige", + "Gainsboro", + "Bisque", + "SaddleBrown", + "Silver", + "Lavender", + "Teal", + "LightCyan", + "PapayaWhip", + "Purple", + "Coral", + "BurlyWood", + "LightGray", + "Snow", + "MistyRose", + "PowderBlue", + "DarkCyan", + "White", + "Turquoise", + "MediumSlateBlue", + "PeachPuff", + "Moccasin", + "LightSalmon", + "SkyBlue", + "Khaki", + "MediumSpringGreen", + "BlueViolet", + "MintCream", + "Linen", + "SeaGreen", + "HotPink", + "LightYellow", + "BlanchedAlmond", + "RoyalBlue", + "RosyBrown", + "MediumOrchid", + "DarkTurquoise", + "LightGoldenRodYellow", + "LightSkyBlue", +] -#Overlay mask with transparency on top of the image. +# Overlay mask with transparency on top of the image. def overlay(image, mask, color, alpha_transparency=0.5): for channel in range(3): - image[:, :, channel] = np.where(mask == 1, - image[:, :, channel] * - (1 - alpha_transparency) + alpha_transparency * color[channel] * 255, - image[:, :, channel]) + image[:, :, channel] = np.where( + mask == 1, + image[:, :, channel] * (1 - alpha_transparency) + + alpha_transparency * color[channel] * 255, + image[:, :, channel], + ) return image + def visualize_detections(image_path, output_path, detections, labels=[]): - image = Image.open(image_path).convert(mode='RGB') + image = Image.open(image_path).convert(mode="RGB") # Get image dimensions. im_width, im_height = image.size line_width = 2 font = ImageFont.load_default() for d in detections: - color = COLORS[d['class'] % len(COLORS)] + color = COLORS[d["class"] % len(COLORS)] # Dynamically convert PIL color into RGB numpy array. - pixel_color = Image.new("RGB",(1, 1), color) + pixel_color = Image.new("RGB", (1, 1), color) # Normalize. - np_color = (np.asarray(pixel_color)[0][0])/255 + np_color = (np.asarray(pixel_color)[0][0]) / 255 # Process TF and TRT instance segmentation masks. - if isinstance(d['mask'], np.ndarray) and d['mask'].shape == (33, 33): + if isinstance(d["mask"], np.ndarray) and d["mask"].shape == (33, 33): # Get detection bbox resolution. - det_width = round(d['xmax'] - d['xmin']) - det_height = round(d['ymax'] - d['ymin']) + det_width = round(d["xmax"] - d["xmin"]) + det_height = round(d["ymax"] - d["ymin"]) # Create an image out of predicted mask array. - small_mask = Image.fromarray(d['mask']) + small_mask = Image.fromarray(d["mask"]) # Upsample mask to detection bbox's size. mask = small_mask.resize((det_width, det_height), resample=Image.BILINEAR) # Create an original image sized template for correct mask placement. pad = Image.new("L", (im_width, im_height)) # Place your mask according to detection bbox placement. - pad.paste(mask, (round(d['xmin']), (round(d['ymin'])))) + pad.paste(mask, (round(d["xmin"]), (round(d["ymin"])))) # Reconvert mask into numpy array for evaluation. padded_mask = np.array(pad) - #Creat np.array from original image, copy in order to modify. + # Creat np.array from original image, copy in order to modify. image_copy = np.asarray(image).copy() # Image with overlaid mask. masked_image = overlay(image_copy, padded_mask, np_color) # Reconvert back to PIL. image = Image.fromarray(masked_image) # Separate clause for ground truth instance segmentation masks. - elif isinstance(d['mask'], np.ndarray): - #Creat np.array from original image, copy in order to modify. + elif isinstance(d["mask"], np.ndarray): + # Creat np.array from original image, copy in order to modify. image_copy = np.asarray(image).copy() # Image with overlaid mask. - masked_image = overlay(image_copy, d['mask'], np_color) + masked_image = overlay(image_copy, d["mask"], np_color) # Reconvert back to PIL image = Image.fromarray(masked_image) # Bbox lines. draw = ImageDraw.Draw(image) - draw.line([(d['xmin'], d['ymin']), (d['xmin'], d['ymax']), (d['xmax'], d['ymax']), (d['xmax'], d['ymin']), - (d['xmin'], d['ymin'])], width=line_width, fill=color) - label = "Class {}".format(d['class']) - if d['class'] < len(labels): - label = "{}".format(labels[d['class']]) - score = d['score'] + draw.line( + [ + (d["xmin"], d["ymin"]), + (d["xmin"], d["ymax"]), + (d["xmax"], d["ymax"]), + (d["xmax"], d["ymin"]), + (d["xmin"], d["ymin"]), + ], + width=line_width, + fill=color, + ) + label = "Class {}".format(d["class"]) + if d["class"] < len(labels): + label = "{}".format(labels[d["class"]]) + score = d["score"] text = "{}: {}%".format(label, int(100 * score)) if score < 0: text = label left, top, right, bottom = font.getbbox(text) text_width, text_height = right - left, bottom - top - text_bottom = max(text_height, d['ymin']) - text_left = d['xmin'] + text_bottom = max(text_height, d["ymin"]) + text_left = d["xmin"] margin = np.ceil(0.05 * text_height) - draw.rectangle([(text_left, text_bottom - text_height - 2 * margin), (text_left + text_width, text_bottom)], - fill=color) - draw.text((text_left + margin, text_bottom - text_height - margin), text, fill='black', font=font) + draw.rectangle( + [ + (text_left, text_bottom - text_height - 2 * margin), + (text_left + text_width, text_bottom), + ], + fill=color, + ) + draw.text( + (text_left + margin, text_bottom - text_height - margin), + text, + fill="black", + font=font, + ) if output_path is None: return image image.save(output_path) @@ -123,7 +257,12 @@ def draw_text(draw, font, text, width, bar_height, offset, color): left, top, right, bottom = font.getbbox(text) text_width, text_height = right - left, bottom - top draw.rectangle([(offset, 0), (offset + width, bar_height)], fill=color) - draw.text((offset + (width - text_width) / 2, text_height - text_height / 2), text, fill='black', font=font) + draw.text( + (offset + (width - text_width) / 2, text_height - text_height / 2), + text, + fill="black", + font=font, + ) bar_height = 18 width = 0 @@ -132,7 +271,7 @@ def draw_text(draw, font, text, width, bar_height, offset, color): width += im.width height = max(height, im.height) - concat = Image.new('RGB', (width, height + bar_height)) + concat = Image.new("RGB", (width, height + bar_height)) draw = ImageDraw.Draw(concat) font = ImageFont.load_default() diff --git a/samples/python/yolov3_onnx/data_processing.py b/samples/python/yolov3_onnx/data_processing.py index 8a68145f..998cbc5f 100644 --- a/samples/python/yolov3_onnx/data_processing.py +++ b/samples/python/yolov3_onnx/data_processing.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -31,7 +31,9 @@ def load_label_categories(label_file_path): return categories -LABEL_FILE_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "coco_labels.txt") +LABEL_FILE_PATH = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "coco_labels.txt" +) ALL_CATEGORIES = load_label_categories(LABEL_FILE_PATH) # Let's make sure that there are 80 classes, as expected for the COCO data set: @@ -103,7 +105,14 @@ def _shuffle_and_normalize(self, image): class PostprocessYOLO(object): """Class for post-processing the three outputs tensors from YOLOv3-608.""" - def __init__(self, yolo_masks, yolo_anchors, obj_threshold, nms_threshold, yolo_input_resolution): + def __init__( + self, + yolo_masks, + yolo_anchors, + obj_threshold, + nms_threshold, + yolo_input_resolution, + ): """Initialize with all values that will be kept when processing several frames. Assuming 3 outputs of the network in the case of (large) YOLOv3. @@ -135,7 +144,9 @@ def process(self, outputs, resolution_raw): for output in outputs: outputs_reshaped.append(self._reshape_output(output)) - boxes, categories, confidences = self._process_yolo_output(outputs_reshaped, resolution_raw) + boxes, categories, confidences = self._process_yolo_output( + outputs_reshaped, resolution_raw + ) return boxes, categories, confidences @@ -311,8 +322,12 @@ def _nms_boxes(self, boxes, box_confidences): keep.append(i) xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]]) yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]]) - xx2 = np.minimum(x_coord[i] + width[i], x_coord[ordered[1:]] + width[ordered[1:]]) - yy2 = np.minimum(y_coord[i] + height[i], y_coord[ordered[1:]] + height[ordered[1:]]) + xx2 = np.minimum( + x_coord[i] + width[i], x_coord[ordered[1:]] + width[ordered[1:]] + ) + yy2 = np.minimum( + y_coord[i] + height[i], y_coord[ordered[1:]] + height[ordered[1:]] + ) width1 = np.maximum(0.0, xx2 - xx1 + 1) height1 = np.maximum(0.0, yy2 - yy1 + 1) diff --git a/samples/python/yolov3_onnx/onnx_to_tensorrt.py b/samples/python/yolov3_onnx/onnx_to_tensorrt.py index c7e54d16..2ba322bc 100644 --- a/samples/python/yolov3_onnx/onnx_to_tensorrt.py +++ b/samples/python/yolov3_onnx/onnx_to_tensorrt.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,23 +18,25 @@ from __future__ import print_function +import os +import sys + import numpy as np import tensorrt as trt - +from data_processing import ALL_CATEGORIES, PostprocessYOLO, PreprocessYOLO from PIL import ImageDraw -from data_processing import PreprocessYOLO, PostprocessYOLO, ALL_CATEGORIES - -import sys, os - sys.path.insert(1, os.path.join(sys.path[0], "..")) -import common from downloader import getFilePath +import common + TRT_LOGGER = trt.Logger() -def draw_bboxes(image_raw, bboxes, confidences, categories, all_categories, bbox_color="blue"): +def draw_bboxes( + image_raw, bboxes, confidences, categories, all_categories, bbox_color="blue" +): """Draw the bounding boxes on the original input image and return it. Keyword arguments: @@ -58,7 +60,11 @@ def draw_bboxes(image_raw, bboxes, confidences, categories, all_categories, bbox bottom = min(image_raw.height, np.floor(y_coord + height + 0.5).astype(int)) draw.rectangle(((left, top), (right, bottom)), outline=bbox_color) - draw.text((left, top - 12), "{0} {1:.2f}".format(all_categories[category], score), fill=bbox_color) + draw.text( + (left, top - 12), + "{0} {1:.2f}".format(all_categories[category], score), + fill=bbox_color, + ) return image_raw @@ -69,17 +75,21 @@ def get_engine(onnx_file_path, engine_file_path=""): def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder, builder.create_network( - 0 + 0 ) as network, builder.create_builder_config() as config, trt.OnnxParser( network, TRT_LOGGER ) as parser, trt.Runtime( TRT_LOGGER ) as runtime: - config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 28) # 256MiB + config.set_memory_pool_limit( + trt.MemoryPoolType.WORKSPACE, 1 << 28 + ) # 256MiB # Parse model file if not os.path.exists(onnx_file_path): print( - "ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.".format(onnx_file_path) + "ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.".format( + onnx_file_path + ) ) exit(0) print("Loading ONNX file from path {}...".format(onnx_file_path)) @@ -93,7 +103,11 @@ def build_engine(): # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1 network.get_input(0).shape = [1, 3, 608, 608] print("Completed parsing of ONNX file") - print("Building an engine from file {}; this may take a while...".format(onnx_file_path)) + print( + "Building an engine from file {}; this may take a while...".format( + onnx_file_path + ) + ) plan = builder.build_serialized_network(network, config) engine = runtime.deserialize_cuda_engine(plan) print("Completed creating Engine") @@ -131,19 +145,34 @@ def main(): output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # Do inference with TensorRT trt_outputs = [] - with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: + with get_engine( + onnx_file_path, engine_file_path + ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print("Running inference on image {}...".format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image - trt_outputs = common.do_inference(context, engine=engine, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) + trt_outputs = common.do_inference( + context, + engine=engine, + bindings=bindings, + inputs=inputs, + outputs=outputs, + stream=stream, + ) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. - trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] + trt_outputs = [ + output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) + ] postprocessor_args = { - "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks + "yolo_masks": [ + (6, 7, 8), + (3, 4, 5), + (0, 1, 2), + ], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), @@ -168,7 +197,11 @@ def main(): obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = "dog_bboxes.png" obj_detected_img.save(output_image_path, "PNG") - print("Saved image with bounding boxes of detected objects to {}.".format(output_image_path)) + print( + "Saved image with bounding boxes of detected objects to {}.".format( + output_image_path + ) + ) # Free host and device memory used for inputs and outputs common.free_buffers(inputs, outputs, stream) diff --git a/samples/python/yolov3_onnx/yolov3_to_onnx.py b/samples/python/yolov3_onnx/yolov3_to_onnx.py index 59f8b3a6..ffd9d19f 100644 --- a/samples/python/yolov3_onnx/yolov3_to_onnx.py +++ b/samples/python/yolov3_onnx/yolov3_to_onnx.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -128,7 +128,9 @@ def _parse_params(self, param_line): param_value = layer_indexes elif isinstance(param_value_raw, str) and not param_value_raw.isalpha(): condition_param_value_positive = param_value_raw.isdigit() - condition_param_value_negative = param_value_raw[0] == "-" and param_value_raw[1:].isdigit() + condition_param_value_negative = ( + param_value_raw[0] == "-" and param_value_raw[1:].isdigit() + ) if condition_param_value_positive or condition_param_value_negative: param_value = int(param_value_raw) else: @@ -276,17 +278,29 @@ def load_conv_weights(self, conv_params): initializer = list() inputs = list() if conv_params.batch_normalize: - bias_init, bias_input = self._create_param_tensors(conv_params, "bn", "bias") - bn_scale_init, bn_scale_input = self._create_param_tensors(conv_params, "bn", "scale") - bn_mean_init, bn_mean_input = self._create_param_tensors(conv_params, "bn", "mean") - bn_var_init, bn_var_input = self._create_param_tensors(conv_params, "bn", "var") + bias_init, bias_input = self._create_param_tensors( + conv_params, "bn", "bias" + ) + bn_scale_init, bn_scale_input = self._create_param_tensors( + conv_params, "bn", "scale" + ) + bn_mean_init, bn_mean_input = self._create_param_tensors( + conv_params, "bn", "mean" + ) + bn_var_init, bn_var_input = self._create_param_tensors( + conv_params, "bn", "var" + ) initializer.extend([bn_scale_init, bias_init, bn_mean_init, bn_var_init]) inputs.extend([bn_scale_input, bias_input, bn_mean_input, bn_var_input]) else: - bias_init, bias_input = self._create_param_tensors(conv_params, "conv", "bias") + bias_init, bias_input = self._create_param_tensors( + conv_params, "conv", "bias" + ) initializer.append(bias_init) inputs.append(bias_input) - conv_init, conv_input = self._create_param_tensors(conv_params, "conv", "weights") + conv_init, conv_input = self._create_param_tensors( + conv_params, "conv", "weights" + ) initializer.append(conv_init) inputs.append(conv_input) return initializer, inputs @@ -299,7 +313,11 @@ def _open_weights_file(self, weights_file_path): """ weights_file = open(weights_file_path, "rb") length_header = 5 - np.ndarray(shape=(length_header,), dtype="int32", buffer=weights_file.read(length_header * 4)) + np.ndarray( + shape=(length_header,), + dtype="int32", + buffer=weights_file.read(length_header * 4), + ) return weights_file def _create_param_tensors(self, conv_params, param_category, suffix): @@ -312,10 +330,16 @@ def _create_param_tensors(self, conv_params, param_category, suffix): suffix -- a string determining the sub-type of above param_category (e.g., 'weights' or 'bias') """ - param_name, param_data, param_data_shape = self._load_one_param_type(conv_params, param_category, suffix) + param_name, param_data, param_data_shape = self._load_one_param_type( + conv_params, param_category, suffix + ) - initializer_tensor = helper.make_tensor(param_name, TensorProto.FLOAT, param_data_shape, param_data) - input_tensor = helper.make_tensor_value_info(param_name, TensorProto.FLOAT, param_data_shape) + initializer_tensor = helper.make_tensor( + param_name, TensorProto.FLOAT, param_data_shape, param_data + ) + input_tensor = helper.make_tensor_value_info( + param_name, TensorProto.FLOAT, param_data_shape + ) return initializer_tensor, input_tensor def _load_one_param_type(self, conv_params, param_category, suffix): @@ -337,7 +361,11 @@ def _load_one_param_type(self, conv_params, param_category, suffix): elif suffix == "bias": param_shape = [channels_out] param_size = np.product(np.array(param_shape)) - param_data = np.ndarray(shape=param_shape, dtype="float32", buffer=self.weights_file.read(param_size * 4)) + param_data = np.ndarray( + shape=param_shape, + dtype="float32", + buffer=self.weights_file.read(param_size * 4), + ) param_data = param_data.flatten().astype(float) return param_name, param_data, param_shape @@ -385,7 +413,9 @@ def build_onnx_graph(self, layer_configs, weights_file_path, verbose=True): output_dims = [ self.batch_size, ] + self.output_tensors[tensor_name] - output_tensor = helper.make_tensor_value_info(tensor_name, TensorProto.FLOAT, output_dims) + output_tensor = helper.make_tensor_value_info( + tensor_name, TensorProto.FLOAT, output_dims + ) outputs.append(output_tensor) inputs = [self.input_tensor] weight_loader = WeightLoader(weights_file_path) @@ -395,20 +425,30 @@ def build_onnx_graph(self, layer_configs, weights_file_path, verbose=True): _, layer_type = layer_name.split("_", 1) params = self.param_dict[layer_name] if layer_type == "convolutional": - initializer_layer, inputs_layer = weight_loader.load_conv_weights(params) + initializer_layer, inputs_layer = weight_loader.load_conv_weights( + params + ) initializer.extend(initializer_layer) inputs.extend(inputs_layer) elif layer_type == "upsample": - initializer_layer, inputs_layer = weight_loader.load_resize_scales(params) + initializer_layer, inputs_layer = weight_loader.load_resize_scales( + params + ) initializer.extend(initializer_layer) inputs.extend(inputs_layer) del weight_loader self.graph_def = helper.make_graph( - nodes=self._nodes, name="YOLOv3-608", inputs=inputs, outputs=outputs, initializer=initializer + nodes=self._nodes, + name="YOLOv3-608", + inputs=inputs, + outputs=outputs, + initializer=initializer, ) if verbose: print(helper.printable_graph(self.graph_def)) - model_def = helper.make_model(self.graph_def, producer_name="NVIDIA TensorRT sample") + model_def = helper.make_model( + self.graph_def, producer_name="NVIDIA TensorRT sample" + ) return model_def def _make_onnx_node(self, layer_name, layer_dict): @@ -423,8 +463,12 @@ def _make_onnx_node(self, layer_name, layer_dict): layer_type = layer_dict["type"] if self.input_tensor is None: if layer_type == "net": - major_node_output_name, major_node_output_channels = self._make_input_tensor(layer_name, layer_dict) - major_node_specs = MajorNodeSpecs(major_node_output_name, major_node_output_channels) + major_node_output_name, major_node_output_channels = ( + self._make_input_tensor(layer_name, layer_dict) + ) + major_node_specs = MajorNodeSpecs( + major_node_output_name, major_node_output_channels + ) else: raise ValueError('The first node has to be of type "net".') else: @@ -435,10 +479,17 @@ def _make_onnx_node(self, layer_name, layer_dict): node_creators["upsample"] = self._make_resize_node if layer_type in node_creators.keys(): - major_node_output_name, major_node_output_channels = node_creators[layer_type](layer_name, layer_dict) - major_node_specs = MajorNodeSpecs(major_node_output_name, major_node_output_channels) + major_node_output_name, major_node_output_channels = node_creators[ + layer_type + ](layer_name, layer_dict) + major_node_specs = MajorNodeSpecs( + major_node_output_name, major_node_output_channels + ) else: - print("Layer of type %s not supported, skipping ONNX node generation." % layer_type) + print( + "Layer of type %s not supported, skipping ONNX node generation." + % layer_type + ) major_node_specs = MajorNodeSpecs(layer_name, None) return major_node_specs @@ -491,7 +542,10 @@ def _make_conv_node(self, layer_name, layer_dict): stride = layer_dict["stride"] filters = layer_dict["filters"] batch_normalize = False - if "batch_normalize" in layer_dict.keys() and layer_dict["batch_normalize"] == 1: + if ( + "batch_normalize" in layer_dict.keys() + and layer_dict["batch_normalize"] == 1 + ): batch_normalize = True kernel_shape = [kernel_size, kernel_size] @@ -542,7 +596,11 @@ def _make_conv_node(self, layer_name, layer_dict): layer_name_lrelu = layer_name + "_lrelu" lrelu_node = helper.make_node( - "LeakyRelu", inputs=inputs, outputs=[layer_name_lrelu], name=layer_name_lrelu, alpha=self.alpha_lrelu + "LeakyRelu", + inputs=inputs, + outputs=[layer_name_lrelu], + name=layer_name_lrelu, + alpha=self.alpha_lrelu, ) self._nodes.append(lrelu_node) inputs = [layer_name_lrelu] @@ -633,7 +691,9 @@ def _make_resize_node(self, layer_name, layer_dict): """ resize_scale_factors = float(layer_dict["stride"]) # Create the scale factor array with node parameters - scales = np.array([1.0, 1.0, resize_scale_factors, resize_scale_factors]).astype(np.float32) + scales = np.array( + [1.0, 1.0, resize_scale_factors, resize_scale_factors] + ).astype(np.float32) previous_node_specs = self._get_previous_node_specs() inputs = [previous_node_specs.name] diff --git a/samples/sampleAlgorithmSelector/CMakeLists.txt b/samples/sampleAlgorithmSelector/CMakeLists.txt index ef9386b3..3b30570c 100644 --- a/samples/sampleAlgorithmSelector/CMakeLists.txt +++ b/samples/sampleAlgorithmSelector/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleAlgorithmSelector/sampleAlgorithmSelector.cpp b/samples/sampleAlgorithmSelector/sampleAlgorithmSelector.cpp index 0072f761..02fd9975 100644 --- a/samples/sampleAlgorithmSelector/sampleAlgorithmSelector.cpp +++ b/samples/sampleAlgorithmSelector/sampleAlgorithmSelector.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleCharRNN/CMakeLists.txt b/samples/sampleCharRNN/CMakeLists.txt index 89d82682..d52245fb 100644 --- a/samples/sampleCharRNN/CMakeLists.txt +++ b/samples/sampleCharRNN/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleCharRNN/sampleCharRNN.cpp b/samples/sampleCharRNN/sampleCharRNN.cpp index 73ba53cc..8ddbb2ac 100644 --- a/samples/sampleCharRNN/sampleCharRNN.cpp +++ b/samples/sampleCharRNN/sampleCharRNN.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleDynamicReshape/CMakeLists.txt b/samples/sampleDynamicReshape/CMakeLists.txt index 374b5566..548e9bd5 100644 --- a/samples/sampleDynamicReshape/CMakeLists.txt +++ b/samples/sampleDynamicReshape/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleDynamicReshape/sampleDynamicReshape.cpp b/samples/sampleDynamicReshape/sampleDynamicReshape.cpp index d91b1a68..0f880509 100644 --- a/samples/sampleDynamicReshape/sampleDynamicReshape.cpp +++ b/samples/sampleDynamicReshape/sampleDynamicReshape.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleINT8API/CMakeLists.txt b/samples/sampleINT8API/CMakeLists.txt index e8eed5c3..00a6e82b 100644 --- a/samples/sampleINT8API/CMakeLists.txt +++ b/samples/sampleINT8API/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleINT8API/sampleINT8API.cpp b/samples/sampleINT8API/sampleINT8API.cpp index a20acff3..7cf6e819 100644 --- a/samples/sampleINT8API/sampleINT8API.cpp +++ b/samples/sampleINT8API/sampleINT8API.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleIOFormats/CMakeLists.txt b/samples/sampleIOFormats/CMakeLists.txt index 4ec93187..4640a2ff 100755 --- a/samples/sampleIOFormats/CMakeLists.txt +++ b/samples/sampleIOFormats/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,6 +16,11 @@ # SET(SAMPLE_SOURCES sampleIOFormats.cpp + ../common/sampleDevice.cpp + ../common/sampleEngines.cpp + ../common/sampleOptions.cpp + ../common/sampleUtils.cpp + ../common/bfloat16.cpp ) SET(SAMPLE_PARSERS "onnx") diff --git a/samples/sampleIOFormats/sampleIOFormats.cpp b/samples/sampleIOFormats/sampleIOFormats.cpp index 2c8b87af..9e167134 100644 --- a/samples/sampleIOFormats/sampleIOFormats.cpp +++ b/samples/sampleIOFormats/sampleIOFormats.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -33,6 +33,7 @@ #include "half.h" #include "logger.h" #include "parserOnnxConfig.h" +#include "sampleOptions.h" #include "NvInfer.h" #include "NvOnnxParser.h" @@ -144,6 +145,15 @@ class BufferDesc } }; +//! Specification for a network I/O tensor. +class TypeSpec +{ +public: + DataType dtype; //!< datatype + TensorFormat format; //!< format + std::string formatName; //!< name of the format +}; + class SampleBuffer { public: @@ -245,30 +255,14 @@ class SampleIOFormats bool build(int32_t dataWidth); //! - //! \brief Runs the TensorRT inference engine for this sample - //! - bool infer(SampleBuffer& inputBuf, SampleBuffer& outputBuf); - - //! - //! \brief Used to run CPU reference and get result - //! - bool reference(); - - //! - //! \brief Used to compare the CPU reference with the TRT result + //! \brief Verify the built engine I/O types and formats. //! - void compareResult(); + bool verify(TypeSpec const& spec); //! - //! \brief Reads the digit map from the file - //! - bool readDigits(SampleBuffer& buffer, int32_t groundTruthDigit); - - //! - //! \brief Verifies that the output is correct and prints it + //! \brief Runs the TensorRT inference engine for this sample //! - template - bool verifyOutput(SampleBuffer& outputBuf, int32_t groundTruthDigit) const; + bool infer(SampleBuffer& inputBuf, SampleBuffer& outputBuf); private: //! @@ -293,6 +287,62 @@ class SampleIOFormats int32_t mDigit; }; +//! +//! \brief Validates engine I/O datatypes and formats against a reference. +//! +//! \details This function queries I/O datatype and format description from the built engine. +//! Validating them is sufficient to ensure that `ITensor::setType` and `ITensor::setAllowedFormats` API as +//! expected. +//! +//! \return true if type and format validation succeeds. +//! +bool SampleIOFormats::verify(TypeSpec const& spec) +{ + assert(mEngine->getNbIOTensors() == 2); + char const* inputName = mEngine->getIOTensorName(0); + char const* outputName = mEngine->getIOTensorName(1); + + auto verifyType = [](DataType actual, DataType expected) { + if (actual != expected) + { + sample::gLogError << "Expected " << expected << " data type, got " << actual; + return false; + } + return true; + }; + + if (!verifyType(mEngine->getTensorDataType(inputName), spec.dtype)) + { + return false; + } + + if (!verifyType(mEngine->getTensorDataType(outputName), spec.dtype)) + { + return false; + } + + auto verifyFormat = [](std::string actual, std::string expected) { + if (expected.find(actual) != std::string::npos) + { + sample::gLogError << "Expected " << expected << " format, got " << actual; + return false; + } + return true; + }; + + if (!verifyFormat(std::string(mEngine->getTensorFormatDesc(inputName)), spec.formatName)) + { + return false; + } + + if (!verifyFormat(std::string(mEngine->getTensorFormatDesc(inputName)), "kLINEAR")) + { + return false; + } + + return true; +} + //! //! \brief Creates the network, configures the builder and creates the network engine //! @@ -474,134 +524,6 @@ bool SampleIOFormats::infer(SampleBuffer& inputBuf, SampleBuffer& outputBuf) return true; } -//! -//! \brief Reads the digit map from file -//! -bool SampleIOFormats::readDigits(SampleBuffer& buffer, int32_t groundTruthDigit) -{ - int32_t const inputH = buffer.dims.d[2]; - int32_t const inputW = buffer.dims.d[3]; - - // Read a random digit file - std::vector fileData(inputH * inputW); - readPGMFile( - locateFile(std::to_string(groundTruthDigit) + ".pgm", mParams.dataDirs), fileData.data(), inputH, inputW); - - // Print ASCII representation of digit - for (int32_t i = 0; i < inputH * inputW; i++) - { - sample::gLogInfo << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % inputW) ? "" : "\n"); - } - sample::gLogInfo << std::endl; - - float* inputBuf = reinterpret_cast(buffer.buffer); - - for (int32_t i = 0; i < inputH * inputW; i++) - { - inputBuf[i] = 1.0F - static_cast(fileData[i] / 255.0F); - } - - return true; -} - -//! -//! \brief Verifies that the output is correct and prints it -//! -template -bool SampleIOFormats::verifyOutput(SampleBuffer& outputBuf, int32_t groundTruthDigit) const -{ - T const* prob = reinterpret_cast(outputBuf.buffer); - - float val{0.0F}; - float elem{0.0F}; - int32_t idx{0}; - int32_t const kDIGITS = 10; - - for (int32_t i = 0; i < kDIGITS; i++) - { - elem = static_cast(prob[i]); - if (val < elem) - { - val = elem; - idx = i; - } - } - sample::gLogInfo << "Predicted Output: " << idx << std::endl; - - return (idx == groundTruthDigit && val > 0.9F); -} - -int32_t calcIndex(SampleBuffer& buffer, int32_t c, int32_t h, int32_t w) -{ - int32_t index; - - if (!buffer.desc.channelPivot) - { - index = c / buffer.desc.dims[4] * buffer.desc.dims[2] * buffer.desc.dims[3] * buffer.desc.dims[4] - + h * buffer.desc.dims[3] * buffer.desc.dims[4] + w * buffer.desc.dims[4] + c % buffer.desc.dims[4]; - } - else - { - index = h * buffer.desc.dims[3] * buffer.desc.dims[2] + w * buffer.desc.dims[3] + c; - } - - return index; -} - -//! -//! \brief Reformats the buffer. Src and dst buffers should be of same datatype and dims. -//! -template -void reformat(SampleBuffer& src, SampleBuffer& dst) -{ - if (src.format == dst.format) - { - memcpy(dst.buffer, src.buffer, src.getBufferSize()); - return; - } - - int32_t srcIndex, dstIndex; - - T* srcBuf = reinterpret_cast(src.buffer); - T* dstBuf = reinterpret_cast(dst.buffer); - - for (int32_t c = 0; c < src.dims.d[1]; c++) - { - for (int32_t h = 0; h < src.dims.d[2]; h++) - { - for (int32_t w = 0; w < src.dims.d[3]; w++) - { - srcIndex = calcIndex(src, c, h, w); - dstIndex = calcIndex(dst, c, h, w); - dstBuf[dstIndex] = srcBuf[srcIndex]; - } - } - } -} - -template -void convertGoldenData(SampleBuffer& goldenInput, SampleBuffer& dstInput) -{ - SampleBuffer tmpBuf(goldenInput.dims, sizeof(T), goldenInput.format, true); - - float* golden = reinterpret_cast(goldenInput.buffer); - T* tmp = reinterpret_cast(tmpBuf.buffer); - - for (int32_t i = 0; i < goldenInput.desc.getElememtSize(); i++) - { - if (std::is_same::value) - { - tmp[i] = static_cast(1 - ((1.0F - golden[i]) * 255.0F - 128) / 255.0F); - } - else - { - tmp[i] = static_cast(golden[i]); - } - } - - reformat(tmpBuf, dstInput); -} - //! //! \brief Initializes members of the params struct using the command line args //! @@ -644,67 +566,29 @@ void printHelpInfo() //! template bool process(SampleIOFormats& sample, sample::Logger::TestAtom const& sampleTest, SampleBuffer& inputBuf, - SampleBuffer& outputBuf, SampleBuffer& goldenInput) + SampleBuffer& outputBuf, TypeSpec& spec) { sample::gLogInfo << "Building and running a GPU inference engine with specified I/O formats." << std::endl; - inputBuf = SampleBuffer(sample.mInputDims, sizeof(T), sample.mTensorFormat, true); - outputBuf = SampleBuffer(sample.mOutputDims, sizeof(T), TensorFormat::kLINEAR, false); if (!sample.build(sizeof(T))) { return false; } - convertGoldenData(goldenInput, inputBuf); - - if (!sample.infer(inputBuf, outputBuf)) - { - return false; - } - - if (!sample.verifyOutput(outputBuf, sample.mDigit)) - { - return false; - } - - return true; -} - -bool runFP32Reference(SampleIOFormats& sample, sample::Logger::TestAtom const& sampleTest, SampleBuffer& goldenInput, - SampleBuffer& goldenOutput) -{ - sample::gLogInfo << "Building and running a FP32 GPU inference to get golden input/output" << std::endl; - - if (!sample.build(sizeof(float))) + if (!sample.verify(spec)) { return false; } - goldenInput = SampleBuffer(sample.mInputDims, sizeof(float), TensorFormat::kLINEAR, true); - goldenOutput = SampleBuffer(sample.mOutputDims, sizeof(float), TensorFormat::kLINEAR, false); - - sample.readDigits(goldenInput, sample.mDigit); - - if (!sample.infer(goldenInput, goldenOutput)) - { - return false; - } + inputBuf = SampleBuffer(sample.mInputDims, sizeof(T), sample.mTensorFormat, true); + outputBuf = SampleBuffer(sample.mOutputDims, sizeof(T), TensorFormat::kLINEAR, false); - if (!sample.verifyOutput(goldenOutput, sample.mDigit)) + if (!sample.infer(inputBuf, outputBuf)) { return false; } - return true; } -//! Specification for a network I/O tensor. -class IOSpec -{ -public: - TensorFormat format; //!< format - std::string formatName; //!< name of the format -}; - int32_t main(int32_t argc, char** argv) { samplesCommon::Args args; @@ -727,56 +611,45 @@ int32_t main(int32_t argc, char** argv) samplesCommon::OnnxSampleParams params = initializeSampleParams(args); - std::vector vecFP16TensorFmt = { - IOSpec{TensorFormat::kLINEAR, "kLINEAR"}, - IOSpec{TensorFormat::kCHW2, "kCHW2"}, - IOSpec{TensorFormat::kHWC8, "kHWC8"}, - }; - std::vector vecINT8TensorFmt = { - IOSpec{TensorFormat::kLINEAR, "kLINEAR"}, - IOSpec{TensorFormat::kCHW4, "kCHW4"}, - IOSpec{TensorFormat::kCHW32, "kCHW32"}, + std::vector fp16TypeSpec = { + TypeSpec{DataType::kHALF, TensorFormat::kLINEAR, "kLINEAR"}, + TypeSpec{DataType::kHALF, TensorFormat::kCHW2, "kCHW2"}, + TypeSpec{DataType::kHALF, TensorFormat::kHWC8, "kHWC8"}, }; - SampleBuffer goldenInput, goldenOutput; + std::vector int8TypeSpec = { + TypeSpec{DataType::kINT8, TensorFormat::kLINEAR, "kLINEAR"}, + TypeSpec{DataType::kINT8, TensorFormat::kCHW4, "kCHW4"}, + TypeSpec{DataType::kINT8, TensorFormat::kCHW32, "kCHW32"}, + }; SampleIOFormats sample(params); - srand(unsigned(time(nullptr))); - sample.mDigit = rand() % 10; - - sample::gLogInfo << "The test chooses MNIST as the network and recognizes a randomly generated digit" << std::endl; sample::gLogInfo - << "Firstly it runs the FP32 as the golden data, then INT8/FP16 with different formats will be tested" - << std::endl + << "Build TRT engine with different IO data type and formats. Ensure that built engine abide by them" << std::endl; - if (!runFP32Reference(sample, sampleTest, goldenInput, goldenOutput)) - { - return sample::gLogger.reportFail(sampleTest); - } - // Test FP16 formats - for (auto spec : vecFP16TensorFmt) + for (auto spec : fp16TypeSpec) { sample::gLogInfo << "Testing datatype FP16 with format " << spec.formatName << std::endl; sample.mTensorFormat = spec.format; SampleBuffer inputBuf, outputBuf; - if (!process(sample, sampleTest, inputBuf, outputBuf, goldenInput)) + if (!process(sample, sampleTest, inputBuf, outputBuf, spec)) { return sample::gLogger.reportFail(sampleTest); } } // Test INT8 formats - for (auto spec : vecINT8TensorFmt) + for (auto spec : int8TypeSpec) { sample::gLogInfo << "Testing datatype INT8 with format " << spec.formatName << std::endl; sample.mTensorFormat = spec.format; SampleBuffer inputBuf, outputBuf; - if (!process(sample, sampleTest, inputBuf, outputBuf, goldenInput)) + if (!process(sample, sampleTest, inputBuf, outputBuf, spec)) { return sample::gLogger.reportFail(sampleTest); } diff --git a/samples/sampleNamedDimensions/CMakeLists.txt b/samples/sampleNamedDimensions/CMakeLists.txt index f03d19b1..21662668 100644 --- a/samples/sampleNamedDimensions/CMakeLists.txt +++ b/samples/sampleNamedDimensions/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleNamedDimensions/create_model.py b/samples/sampleNamedDimensions/create_model.py index e4146aa5..575bd4e6 100644 --- a/samples/sampleNamedDimensions/create_model.py +++ b/samples/sampleNamedDimensions/create_model.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleNamedDimensions/sampleNamedDimensions.cpp b/samples/sampleNamedDimensions/sampleNamedDimensions.cpp index 42298ba4..11e04841 100644 --- a/samples/sampleNamedDimensions/sampleNamedDimensions.cpp +++ b/samples/sampleNamedDimensions/sampleNamedDimensions.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleNonZeroPlugin/README.md b/samples/sampleNonZeroPlugin/README.md index 15e8e4c2..10e45109 100644 --- a/samples/sampleNonZeroPlugin/README.md +++ b/samples/sampleNonZeroPlugin/README.md @@ -16,7 +16,7 @@ ## Description This sample, sampleNonZeroPlugin, implements a plugin for the NonZero operation, customizable to output the non-zero indices in -either a row major (each set of indices in the same row) or column major format (each set of indices in the same column). +either a row order (each set of indices in the same row) or column order format (each set of indices in the same column). NonZero is an operation where the non-zero indices of the input tensor is found. @@ -36,7 +36,7 @@ Until `IPluginV3` (and associated interfaces), TensorRT plugins could not have o on input shapes). `IPluginV3OneBuild` which exposes a build capability for `IPluginV3`, provides support for such data-dependent output shapes. `NonZeroPlugin` in this sample is written to handle 2-D input tensors of shape $R \times C$. Assume that the tensor contains $K$ non-zero elements and that the -non-zero indices are required in a row-major order. Then the output shape would be $K \times 2$. +non-zero indices are required in a row ordering (each set of indices in its own row). Then the output shape would be $K \times 2$. The output shapes are expressed to the TensorRT builder through the `IPluginV3OneBuild::getOutputShapes()` API. Expressing the second dimension of the output is straightforward: @@ -70,7 +70,7 @@ and let's not forget to declare that the size tensor is a scalar (0-D): outputs[1].nbDims = 0; ``` -The `NonZeroPlugin` can also be configured to emit the non-zero indices in a column-major fashion through the `rowMajor` plugin attribute, by setting it to `0`. +The `NonZeroPlugin` can also be configured to emit the non-zero indices in a column-order fashion through the `rowOrder` plugin attribute, by setting it to `0`. In this case, the first output of the plugin will have shape $2 \times K$, and the output shape specification must be adjusted accordingly. ### Creating network and building the engine @@ -95,7 +95,7 @@ Download the sample data from the [TensorRT release tarball](https://developer.n 2. Run the sample to build and run the MNIST engine from the ONNX model. ``` - ./sample_non_zero_plugin [-h or --help] [-d or --datadir=] [--columnMajor] [--fp16] + ./sample_non_zero_plugin [-h or --help] [-d or --datadir=] [--columnOrder] [--fp16] ``` 3. Verify that the sample ran successfully. If the sample runs successfully you should see output similar to the following: diff --git a/samples/sampleNonZeroPlugin/nonZeroKernel.cu b/samples/sampleNonZeroPlugin/nonZeroKernel.cu index 7e015b2c..cdb4c615 100644 --- a/samples/sampleNonZeroPlugin/nonZeroKernel.cu +++ b/samples/sampleNonZeroPlugin/nonZeroKernel.cu @@ -17,8 +17,23 @@ #include "nonZeroKernel.h" +inline __device__ int32_t isZero(float const& a) +{ + return a == 0.F; +} + +inline __device__ int32_t isZero(half const& a) +{ +#if __CUDA_ARCH__ >= 530 + return a == __float2half(0.F); +#else + return __half2float(a) == 0.F; +#endif +} + +template __global__ void findNonZeroIndicesKernel( - float const* X, int32_t* indices, int32_t* count, int32_t const* K, int32_t R, int32_t C, bool rowMajor) + T const* X, int32_t* indices, int32_t* count, int32_t const* K, int32_t R, int32_t C, int32_t rowOrder) { int32_t col = blockIdx.x * blockDim.x + threadIdx.x; @@ -27,12 +42,12 @@ __global__ void findNonZeroIndicesKernel( { for (int32_t row = 0; row < R; ++row) { - if (X[row + R * col] != 0.F) + if (!isZero(X[row * C + col])) { int32_t index = atomicAdd(count, 1); // Increment count atomically and get the previous value if (indices) { - if(!rowMajor) + if(rowOrder == 0) { indices[index] = row; indices[index + *K] = col; @@ -48,11 +63,20 @@ __global__ void findNonZeroIndicesKernel( } } -void nonZeroIndicesImpl( - float const* X, int32_t* indices, int32_t* count, int32_t const* K, int32_t R, int32_t C, bool rowMajor, cudaStream_t stream) +template +void nonZeroIndicesImpl(T const* X, int32_t* indices, int32_t* count, int32_t const* K, int32_t R, int32_t C, + bool rowOrder, cudaStream_t stream) { constexpr int32_t kBLOCK_SIZE = 256; - int32_t const blocksPerGrid = (R + kBLOCK_SIZE - 1) / kBLOCK_SIZE; - - findNonZeroIndicesKernel<<>>(X, indices, count, K, R, C, rowMajor); + int32_t const blocksPerGrid = (C + kBLOCK_SIZE - 1) / kBLOCK_SIZE; + + findNonZeroIndicesKernel<<>>( + X, indices, count, K, R, C, static_cast(rowOrder)); } + +#define NONZERO_SPECIALIZED_IMPL(T) \ + template void nonZeroIndicesImpl(T const* X, int32_t* indices, int32_t* count, int32_t const* K, int32_t R, \ + int32_t C, bool rowOrder, cudaStream_t stream); + +NONZERO_SPECIALIZED_IMPL(float) +NONZERO_SPECIALIZED_IMPL(half) diff --git a/samples/sampleNonZeroPlugin/nonZeroKernel.h b/samples/sampleNonZeroPlugin/nonZeroKernel.h index 4dbb1ab0..c2f23c8e 100644 --- a/samples/sampleNonZeroPlugin/nonZeroKernel.h +++ b/samples/sampleNonZeroPlugin/nonZeroKernel.h @@ -16,9 +16,13 @@ */ #ifndef SAMPLE_NONZERO_KERNEL_H #define SAMPLE_NONZERO_KERNEL_H + +#include + #include -void nonZeroIndicesImpl(float const* X, int32_t* indices, int32_t* count, int32_t const* K, int32_t R, int32_t C, - bool rowMajor, cudaStream_t stream); +template +void nonZeroIndicesImpl(T const* X, int32_t* indices, int32_t* count, int32_t const* K, int32_t R, int32_t C, + bool rowOrder, cudaStream_t stream); #endif // SAMPLE_NONZERO_KERNEL_H diff --git a/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp b/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp index 47b5e7b3..40313f40 100644 --- a/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp +++ b/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp @@ -46,13 +46,34 @@ using samplesCommon::SampleUniquePtr; std::string const kSAMPLE_NAME = "TensorRT.sample_non_zero_plugin"; +using half = __half; + +void nonZeroIndicesHelper(nvinfer1::DataType type, void const* X, void* indices, void* count, void const* K, int32_t R, + int32_t C, bool rowOrder, cudaStream_t stream) +{ + if (type == nvinfer1::DataType::kFLOAT) + { + nonZeroIndicesImpl(static_cast(X), static_cast(indices), + static_cast(count), static_cast(K), R, C, rowOrder, stream); + } + else if (type == nvinfer1::DataType::kHALF) + { + nonZeroIndicesImpl(static_cast(X), static_cast(indices), + static_cast(count), static_cast(K), R, C, rowOrder, stream); + } + else + { + ASSERT(false && "Unsupported data type"); + } +} + class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV3OneBuild, public IPluginV3OneRuntime { public: NonZeroPlugin(NonZeroPlugin const& p) = default; - NonZeroPlugin(bool rowMajor) - : mRowMajor(rowMajor) + NonZeroPlugin(bool rowOrder) + : mRowOrder(rowOrder) { initFieldsToSerialize(); } @@ -60,7 +81,7 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV void initFieldsToSerialize() { mDataToSerialize.clear(); - mDataToSerialize.emplace_back(PluginField("rowMajor", &mRowMajor, PluginFieldType::kINT32, 1)); + mDataToSerialize.emplace_back(PluginField("rowOrder", &mRowOrder, PluginFieldType::kINT32, 1)); mFCToSerialize.nbFields = mDataToSerialize.size(); mFCToSerialize.fields = mDataToSerialize.data(); } @@ -170,7 +191,7 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV auto optValue = exprBuilder.operation(DimensionOperation::kFLOOR_DIV, *upperBound, *exprBuilder.constant(2)); auto numNonZeroSizeTensor = exprBuilder.declareSizeTensor(1, *optValue, *upperBound); - if (!mRowMajor) + if (!mRowOrder) { outputs[0].d[0] = exprBuilder.constant(2); outputs[0].d[1] = numNonZeroSizeTensor; @@ -195,25 +216,29 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV int32_t const R = inputDesc[0].dims.d[0]; int32_t const C = inputDesc[0].dims.d[1]; + auto type = inputDesc[0].type; + + if (!(type == nvinfer1::DataType::kHALF || type == nvinfer1::DataType::kFLOAT)) + { + sample::gLogError << "Unsupported: Sample only supports DataType::kHALF and DataType::FLOAT" << std::endl; + return -1; + } + cudaMemsetAsync(outputs[1], 0, sizeof(int32_t), stream); - if (!mRowMajor) + if (!mRowOrder) { // When constructing a column major output, the kernel needs to be aware of the total number of non-zero // elements so as to write the non-zero indices at the correct places. Therefore, we will launch the kernel // twice: first, only to calculate the total non-zero count, which will be stored in workspace; and // then to actually write the non-zero indices to the outputs[0] buffer. cudaMemsetAsync(workspace, 0, sizeof(int32_t), stream); - nonZeroIndicesImpl(static_cast(inputs[0]), nullptr, static_cast(workspace), 0, R, C, - mRowMajor, stream); - - nonZeroIndicesImpl(static_cast(inputs[0]), static_cast(outputs[0]), - static_cast(outputs[1]), static_cast(workspace), R, C, mRowMajor, stream); + nonZeroIndicesHelper(type, inputs[0], nullptr, workspace, 0, R, C, mRowOrder, stream); + nonZeroIndicesHelper(type, inputs[0], outputs[0], outputs[1], workspace, R, C, mRowOrder, stream); } else { - nonZeroIndicesImpl(static_cast(inputs[0]), static_cast(outputs[0]), - static_cast(outputs[1]), 0, R, C, mRowMajor, stream); + nonZeroIndicesHelper(type, inputs[0], outputs[0], outputs[1], 0, R, C, mRowOrder, stream); } return 0; @@ -242,7 +267,7 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV } private: - bool mRowMajor{true}; + bool mRowOrder{true}; std::vector mDataToSerialize; nvinfer1::PluginFieldCollection mFCToSerialize; }; @@ -253,7 +278,7 @@ class NonZeroPluginCreator : public nvinfer1::IPluginCreatorV3One NonZeroPluginCreator() { mPluginAttributes.clear(); - mPluginAttributes.emplace_back(PluginField("rowMajor", nullptr, PluginFieldType::kINT32, 1)); + mPluginAttributes.emplace_back(PluginField("rowOrder", nullptr, PluginFieldType::kINT32, 1)); mFC.nbFields = mPluginAttributes.size(); mFC.fields = mPluginAttributes.data(); } @@ -277,16 +302,16 @@ class NonZeroPluginCreator : public nvinfer1::IPluginCreatorV3One { try { - bool rowMajor{true}; + bool rowOrder{true}; for (int32_t i = 0; i < fc->nbFields; ++i) { auto const fieldName(fc->fields[i].name); - if (std::strcmp(fieldName, "rowMajor") == 0) + if (std::strcmp(fieldName, "rowOrder") == 0) { - rowMajor = *static_cast(fc->fields[i].data); + rowOrder = *static_cast(fc->fields[i].data); } } - return new NonZeroPlugin(rowMajor); + return new NonZeroPlugin(rowOrder); } catch (std::exception const& e) { @@ -309,7 +334,7 @@ namespace { struct NonZeroParams : public samplesCommon::SampleParams { - bool rowMajor{true}; + bool rowOrder{true}; }; } // namespace @@ -465,7 +490,7 @@ bool SampleNonZeroPlugin::constructNetwork(SampleUniquePtr& auto* in = network->addInput("Input", DataType::kFLOAT, {2, {R, C}}); ASSERT(in != nullptr); - std::vector const vecPF{{"rowMajor", &mParams.rowMajor, PluginFieldType::kINT32, 1}}; + std::vector const vecPF{{"rowOrder", &mParams.rowOrder, PluginFieldType::kINT32, 1}}; PluginFieldCollection pfc{static_cast(vecPF.size()), vecPF.data()}; auto pluginCreator = static_cast(getPluginRegistry()->getCreator("NonZeroPlugin", "0", "")); @@ -579,7 +604,7 @@ bool SampleNonZeroPlugin::processInput(samplesCommon::BufferManager const& buffe { for (int32_t j = 0; j < inputW; ++j) { - sample::gLogInfo << hostDataBuffer[i + inputH * j]; + sample::gLogInfo << hostDataBuffer[i * inputW + j]; if (j < inputW - 1) { sample::gLogInfo << ", "; @@ -606,7 +631,7 @@ bool SampleNonZeroPlugin::verifyOutput(samplesCommon::BufferManager const& buffe std::vector covered(mInputDims.d[0] * mInputDims.d[1], false); sample::gLogInfo << "Output:" << std::endl; - if (mParams.rowMajor) + if (mParams.rowOrder) { for (int32_t i = 0; i < count; ++i) { @@ -629,11 +654,11 @@ bool SampleNonZeroPlugin::verifyOutput(samplesCommon::BufferManager const& buffe } } - if (!mParams.rowMajor) + if (!mParams.rowOrder) { for (int32_t i = 0; i < count; ++i) { - auto const idx = output[i] + mInputDims.d[0] * output[i + count]; + auto const idx = output[i] * mInputDims.d[1] + output[i + count]; covered[idx] = true; if (input[idx] == 0.F) { @@ -645,7 +670,7 @@ bool SampleNonZeroPlugin::verifyOutput(samplesCommon::BufferManager const& buffe { for (int32_t i = 0; i < count; ++i) { - auto const idx = output[2 * i] + mInputDims.d[0] * output[2 * i + 1]; + auto const idx = output[2 * i] * mInputDims.d[1] + output[2 * i + 1]; covered[idx] = true; if (input[idx] == 0.F) { @@ -688,7 +713,7 @@ NonZeroParams initializeSampleParams(samplesCommon::Args const& args) params.outputTensorNames.push_back("Output0"); params.outputTensorNames.push_back("Output1"); params.fp16 = args.runInFp16; - params.rowMajor = args.rowMajor; + params.rowOrder = args.rowOrder; return params; } @@ -706,7 +731,7 @@ void printHelpInfo() "(data/samples/mnist/, data/mnist/)" << std::endl; std::cout << "--fp16 Run in FP16 mode." << std::endl; - std::cout << "--columnMajor Run plugin in column major output mode." << std::endl; + std::cout << "--columnOrder Run plugin in column major output mode." << std::endl; } int main(int argc, char** argv) diff --git a/samples/sampleOnnxMNIST/CMakeLists.txt b/samples/sampleOnnxMNIST/CMakeLists.txt index 23bd886b..6ed6da36 100644 --- a/samples/sampleOnnxMNIST/CMakeLists.txt +++ b/samples/sampleOnnxMNIST/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp b/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp index 35bfbd04..9dfd67c8 100644 --- a/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp +++ b/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleOnnxMnistCoordConvAC/CMakeLists.txt b/samples/sampleOnnxMnistCoordConvAC/CMakeLists.txt index b094cf08..bf8324d8 100644 --- a/samples/sampleOnnxMnistCoordConvAC/CMakeLists.txt +++ b/samples/sampleOnnxMnistCoordConvAC/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleOnnxMnistCoordConvAC/coord_conv.py b/samples/sampleOnnxMnistCoordConvAC/coord_conv.py index b2572ad5..6cd47eca 100644 --- a/samples/sampleOnnxMnistCoordConvAC/coord_conv.py +++ b/samples/sampleOnnxMnistCoordConvAC/coord_conv.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleOnnxMnistCoordConvAC/mnist_coord_conv_train.py b/samples/sampleOnnxMnistCoordConvAC/mnist_coord_conv_train.py index 8d0a9623..c7891d92 100644 --- a/samples/sampleOnnxMnistCoordConvAC/mnist_coord_conv_train.py +++ b/samples/sampleOnnxMnistCoordConvAC/mnist_coord_conv_train.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleOnnxMnistCoordConvAC/modify_onnx_ac.py b/samples/sampleOnnxMnistCoordConvAC/modify_onnx_ac.py index 0de8321d..8eb45bf9 100644 --- a/samples/sampleOnnxMnistCoordConvAC/modify_onnx_ac.py +++ b/samples/sampleOnnxMnistCoordConvAC/modify_onnx_ac.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp b/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp index 491186e5..02218820 100644 --- a/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp +++ b/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleProgressMonitor/CMakeLists.txt b/samples/sampleProgressMonitor/CMakeLists.txt index 582cbbf1..bf2cc4c2 100644 --- a/samples/sampleProgressMonitor/CMakeLists.txt +++ b/samples/sampleProgressMonitor/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/sampleProgressMonitor/sampleProgressMonitor.cpp b/samples/sampleProgressMonitor/sampleProgressMonitor.cpp index c9da0f23..393dc617 100644 --- a/samples/sampleProgressMonitor/sampleProgressMonitor.cpp +++ b/samples/sampleProgressMonitor/sampleProgressMonitor.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/trtexec/CMakeLists.txt b/samples/trtexec/CMakeLists.txt index 93b87ec5..c1e3f793 100644 --- a/samples/trtexec/CMakeLists.txt +++ b/samples/trtexec/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/trtexec/prn_utils.py b/samples/trtexec/prn_utils.py index 6d759238..6b0abf9f 100755 --- a/samples/trtexec/prn_utils.py +++ b/samples/trtexec/prn_utils.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/trtexec/profiler.py b/samples/trtexec/profiler.py index e251254b..0a34e69f 100755 --- a/samples/trtexec/profiler.py +++ b/samples/trtexec/profiler.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/trtexec/tracer.py b/samples/trtexec/tracer.py index 8a9b7a62..4b093d76 100755 --- a/samples/trtexec/tracer.py +++ b/samples/trtexec/tracer.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/trtexec/trtexec.cpp b/samples/trtexec/trtexec.cpp index ece19ed6..f3f72a1f 100644 --- a/samples/trtexec/trtexec.cpp +++ b/samples/trtexec/trtexec.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -52,11 +52,11 @@ using LibraryPtr = std::unique_ptr; #if !TRT_STATIC #if defined(_WIN32) -std::string const kNVINFER_PLUGIN_LIBNAME{"nvinfer_plugin.dll"}; -std::string const kNVINFER_LIBNAME{"nvinfer.dll"}; -std::string const kNVONNXPARSER_LIBNAME{"nvonnxparser.dll"}; -std::string const kNVINFER_LEAN_LIBNAME{"nvinfer_lean.dll"}; -std::string const kNVINFER_DISPATCH_LIBNAME{"nvinfer_dispatch.dll"}; +std::string const kNVINFER_PLUGIN_LIBNAME = std::string{"nvinfer_plugin_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"}; +std::string const kNVINFER_LIBNAME = std::string{"nvinfer_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"}; +std::string const kNVONNXPARSER_LIBNAME = std::string{"nvonnxparser_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"}; +std::string const kNVINFER_LEAN_LIBNAME = std::string{"nvinfer_lean_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"}; +std::string const kNVINFER_DISPATCH_LIBNAME = std::string{"nvinfer_dispatch_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"}; #else std::string const kNVINFER_PLUGIN_LIBNAME = std::string{"libnvinfer_plugin.so."} + std::to_string(NV_TENSORRT_MAJOR); std::string const kNVINFER_LIBNAME = std::string{"libnvinfer.so."} + std::to_string(NV_TENSORRT_MAJOR); diff --git a/samples/utils/fileLock.cpp b/samples/utils/fileLock.cpp index 0b45c2df..e155c0bd 100644 --- a/samples/utils/fileLock.cpp +++ b/samples/utils/fileLock.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/utils/fileLock.h b/samples/utils/fileLock.h index 628da207..d0f64a5b 100644 --- a/samples/utils/fileLock.h +++ b/samples/utils/fileLock.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/utils/timingCache.cpp b/samples/utils/timingCache.cpp index 1ddf083d..aec9674e 100644 --- a/samples/utils/timingCache.cpp +++ b/samples/utils/timingCache.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/samples/utils/timingCache.h b/samples/utils/timingCache.h index fff4a482..c8ffbd97 100644 --- a/samples/utils/timingCache.h +++ b/samples/utils/timingCache.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/convert_te_onnx_to_trt_onnx.py b/scripts/convert_te_onnx_to_trt_onnx.py index 6969aa2e..e82f82b2 100644 --- a/scripts/convert_te_onnx_to_trt_onnx.py +++ b/scripts/convert_te_onnx_to_trt_onnx.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/copyright-scan.py b/scripts/copyright-scan.py index cc51c8e1..45b3bbe9 100644 --- a/scripts/copyright-scan.py +++ b/scripts/copyright-scan.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/stubify.sh b/scripts/stubify.sh index aad43500..788d4672 100755 --- a/scripts/stubify.sh +++ b/scripts/stubify.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/third_party/ieee/half.h b/third_party/ieee/half.h index c1f20f16..c4df4b67 100644 --- a/third_party/ieee/half.h +++ b/third_party/ieee/half.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/third_party/protobuf.cmake b/third_party/protobuf.cmake index 6b1fbd43..6b3d87ff 100644 --- a/third_party/protobuf.cmake +++ b/third_party/protobuf.cmake @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -48,11 +48,12 @@ macro(configure_protobuf VERSION) set(Protobuf_BIN_DIR "${CMAKE_BINARY_DIR}/${Protobuf_TARGET}/bin") find_file (CENTOS_FOUND centos-release PATHS /etc) - if (CENTOS_FOUND) + find_file (ROCKY_FOUND rocky-release PATHS /etc) + if (CENTOS_FOUND OR ROCKY_FOUND) set(Protobuf_LIB_DIR "${CMAKE_BINARY_DIR}/${Protobuf_TARGET}/lib64") - else (CENTOS_FOUND) + else (CENTOS_FOUND OR ROCKY_FOUND) set(Protobuf_LIB_DIR "${CMAKE_BINARY_DIR}/${Protobuf_TARGET}/lib") - endif (CENTOS_FOUND) + endif (CENTOS_FOUND OR ROCKY_FOUND) set(Protobuf_INCLUDE_DIR "${CMAKE_BINARY_DIR}/${Protobuf_TARGET}/include") set(Protobuf_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/${Protobuf_TARGET}/include") set(Protobuf_PROTOC_EXECUTABLE "${Protobuf_BIN_DIR}/protoc") diff --git a/tools/Polygraphy/CHANGELOG.md b/tools/Polygraphy/CHANGELOG.md index a04b5ca5..c3fb0151 100644 --- a/tools/Polygraphy/CHANGELOG.md +++ b/tools/Polygraphy/CHANGELOG.md @@ -3,6 +3,15 @@ Dates are in YYYY-MM-DD format. +## v0.49.10 (2024-04-19) +### Added +- Added an `EngineFromPath` loader to deserialize an engine directly from disk. This will save CPU memory when weight streaming is enabled. + +### Fixed +- Fixed a memory leak in `TrtRunner` caused by creating a new output allocator per inference. +- Fixed a bug where the `Calibrator` would not force non-index inputs to FP32; this is required by TensorRT. + + ## v0.49.9 (2024-03-19) ### Added - Added `run_opts` argument to `tools.main` to allow calling polygraphy tools from within other Python scripts. diff --git a/tools/Polygraphy/Makefile b/tools/Polygraphy/Makefile index 74b701bc..841025c2 100644 --- a/tools/Polygraphy/Makefile +++ b/tools/Polygraphy/Makefile @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/docs/conf.py b/tools/Polygraphy/docs/conf.py index 58c88f92..fff44c97 100644 --- a/tools/Polygraphy/docs/conf.py +++ b/tools/Polygraphy/docs/conf.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -31,7 +31,15 @@ ] # Want to be able to generate docs with no dependencies installed -autodoc_mock_imports = ["tensorrt", "onnx", "numpy", "tensorflow", "onnx_graphsurgeon", "onnxruntime", "tf2onnx"] +autodoc_mock_imports = [ + "tensorrt", + "onnx", + "numpy", + "tensorflow", + "onnx_graphsurgeon", + "onnxruntime", + "tf2onnx", +] autodoc_default_options = { @@ -56,7 +64,7 @@ # General information about the project. project = "Polygraphy" -copyright = "2022, NVIDIA" +copyright = "2024, NVIDIA" author = "NVIDIA" version = polygraphy.__version__ @@ -89,7 +97,10 @@ # Unlimited depth sidebar. html_theme_options = {"navigation_depth": -1} -html_sidebars = {"**": ["globaltoc.html", "relations.html", "sourcelink.html", "searchbox.html"]} +html_sidebars = { + "**": ["globaltoc.html", "relations.html", "sourcelink.html", "searchbox.html"] +} + # Allows us to override the default page width in the Sphinx theme. def setup(app): diff --git a/tools/Polygraphy/docs/requirements.txt b/tools/Polygraphy/docs/requirements.txt index 7000a6e4..aeb43aed 100644 --- a/tools/Polygraphy/docs/requirements.txt +++ b/tools/Polygraphy/docs/requirements.txt @@ -1,3 +1,3 @@ -sphinx-rtd-theme==1.0.0 -sphinx==4.4.0 -docutils==0.17.1 +sphinx-rtd-theme==2.0.0 +sphinx==7.2.6 +docutils==0.20.1 diff --git a/tools/Polygraphy/examples/api/00_inference_with_tensorrt/build_and_run.py b/tools/Polygraphy/examples/api/00_inference_with_tensorrt/build_and_run.py index 18667f22..7ca9f3a4 100644 --- a/tools/Polygraphy/examples/api/00_inference_with_tensorrt/build_and_run.py +++ b/tools/Polygraphy/examples/api/00_inference_with_tensorrt/build_and_run.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,7 +21,13 @@ starting from an ONNX identity model. """ import numpy as np -from polygraphy.backend.trt import CreateConfig, EngineFromNetwork, NetworkFromOnnxPath, SaveEngine, TrtRunner +from polygraphy.backend.trt import ( + CreateConfig, + EngineFromNetwork, + NetworkFromOnnxPath, + SaveEngine, + TrtRunner, +) def main(): diff --git a/tools/Polygraphy/examples/api/00_inference_with_tensorrt/load_and_run.py b/tools/Polygraphy/examples/api/00_inference_with_tensorrt/load_and_run.py index 3ba4c0db..bc05b45d 100644 --- a/tools/Polygraphy/examples/api/00_inference_with_tensorrt/load_and_run.py +++ b/tools/Polygraphy/examples/api/00_inference_with_tensorrt/load_and_run.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/examples/api/01_comparing_frameworks/example.py b/tools/Polygraphy/examples/api/01_comparing_frameworks/example.py index 2503cb03..5eee4f60 100644 --- a/tools/Polygraphy/examples/api/01_comparing_frameworks/example.py +++ b/tools/Polygraphy/examples/api/01_comparing_frameworks/example.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -51,7 +51,11 @@ def main(): # TIP: The `compare_func` parameter can be used to control how outputs are compared (see API reference for details). # The default comparison function is created by `CompareFunc.simple()`, but we can construct it # explicitly if we want to change the default parameters, such as tolerance. - assert bool(Comparator.compare_accuracy(run_results, compare_func=CompareFunc.simple(atol=1e-8))) + assert bool( + Comparator.compare_accuracy( + run_results, compare_func=CompareFunc.simple(atol=1e-8) + ) + ) # We can use `RunResults.save()` method to save the inference results to a JSON file. # This can be useful if you want to generate and compare results separately. diff --git a/tools/Polygraphy/examples/api/02_validating_on_a_dataset/example.py b/tools/Polygraphy/examples/api/02_validating_on_a_dataset/example.py index f07bc26f..2c2f262b 100644 --- a/tools/Polygraphy/examples/api/02_validating_on_a_dataset/example.py +++ b/tools/Polygraphy/examples/api/02_validating_on_a_dataset/example.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,7 +40,7 @@ def main(): build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx")) with TrtRunner(build_engine) as runner: - for (data, golden) in zip(REAL_DATASET, EXPECTED_OUTPUTS): + for data, golden in zip(REAL_DATASET, EXPECTED_OUTPUTS): # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls. # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`. outputs = runner.infer(feed_dict={"x": data}) diff --git a/tools/Polygraphy/examples/api/03_interoperating_with_tensorrt/example.py b/tools/Polygraphy/examples/api/03_interoperating_with_tensorrt/example.py index 9658dba0..d3b388a5 100644 --- a/tools/Polygraphy/examples/api/03_interoperating_with_tensorrt/example.py +++ b/tools/Polygraphy/examples/api/03_interoperating_with_tensorrt/example.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,7 +24,12 @@ import numpy as np import tensorrt as trt from polygraphy import func -from polygraphy.backend.trt import CreateConfig, EngineFromNetwork, NetworkFromOnnxPath, TrtRunner +from polygraphy.backend.trt import ( + CreateConfig, + EngineFromNetwork, + NetworkFromOnnxPath, + TrtRunner, +) # TIP: The immediately evaluated functional API makes it very easy to interoperate @@ -33,6 +38,7 @@ # We can use the `extend` decorator to easily extend lazy loaders provided by Polygraphy # The parameters our decorated function takes should match the return values of the loader we are extending. + # For `NetworkFromOnnxPath`, we can see from the API documentation that it returns a TensorRT # builder, network and parser. That is what our function will receive. @func.extend(NetworkFromOnnxPath("identity.onnx")) diff --git a/tools/Polygraphy/examples/api/04_int8_calibration_in_tensorrt/example.py b/tools/Polygraphy/examples/api/04_int8_calibration_in_tensorrt/example.py index 168bf217..6260b8ef 100644 --- a/tools/Polygraphy/examples/api/04_int8_calibration_in_tensorrt/example.py +++ b/tools/Polygraphy/examples/api/04_int8_calibration_in_tensorrt/example.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,7 +21,13 @@ to calibrate a TensorRT engine to run in INT8 precision. """ import numpy as np -from polygraphy.backend.trt import Calibrator, CreateConfig, EngineFromNetwork, NetworkFromOnnxPath, TrtRunner +from polygraphy.backend.trt import ( + Calibrator, + CreateConfig, + EngineFromNetwork, + NetworkFromOnnxPath, + TrtRunner, +) from polygraphy.logger import G_LOGGER @@ -46,7 +52,8 @@ def main(): # We must enable int8 mode in addition to providing the calibrator. build_engine = EngineFromNetwork( - NetworkFromOnnxPath("identity.onnx"), config=CreateConfig(int8=True, calibrator=calibrator) + NetworkFromOnnxPath("identity.onnx"), + config=CreateConfig(int8=True, calibrator=calibrator), ) # When we activate our runner, it will calibrate and build the engine. If we want to diff --git a/tools/Polygraphy/examples/api/05_using_tensorrt_network_api/example.py b/tools/Polygraphy/examples/api/05_using_tensorrt_network_api/example.py index 91a45235..13f6a95d 100644 --- a/tools/Polygraphy/examples/api/05_using_tensorrt_network_api/example.py +++ b/tools/Polygraphy/examples/api/05_using_tensorrt_network_api/example.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -37,8 +37,12 @@ def create_network(builder, network): # This network will add 1 to the input tensor. inp = network.add_input(name=INPUT_NAME, shape=INPUT_SHAPE, dtype=trt.float32) - ones = network.add_constant(shape=INPUT_SHAPE, weights=np.ones(shape=INPUT_SHAPE, dtype=np.float32)).get_output(0) - add = network.add_elementwise(inp, ones, op=trt.ElementWiseOperation.SUM).get_output(0) + ones = network.add_constant( + shape=INPUT_SHAPE, weights=np.ones(shape=INPUT_SHAPE, dtype=np.float32) + ).get_output(0) + add = network.add_elementwise( + inp, ones, op=trt.ElementWiseOperation.SUM + ).get_output(0) add.name = OUTPUT_NAME network.mark_output(add) @@ -53,7 +57,9 @@ def main(): build_engine = EngineFromNetwork(create_network) with TrtRunner(build_engine) as runner: - feed_dict = {INPUT_NAME: np.random.random_sample(INPUT_SHAPE).astype(np.float32)} + feed_dict = { + INPUT_NAME: np.random.random_sample(INPUT_SHAPE).astype(np.float32) + } # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls. # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`. diff --git a/tools/Polygraphy/examples/api/06_immediate_eval_api/build_and_run.py b/tools/Polygraphy/examples/api/06_immediate_eval_api/build_and_run.py index b0cf567e..b1cffd97 100644 --- a/tools/Polygraphy/examples/api/06_immediate_eval_api/build_and_run.py +++ b/tools/Polygraphy/examples/api/06_immediate_eval_api/build_and_run.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,7 +23,13 @@ save the engine, and finally run inference. """ import numpy as np -from polygraphy.backend.trt import TrtRunner, create_config, engine_from_network, network_from_onnx_path, save_engine +from polygraphy.backend.trt import ( + TrtRunner, + create_config, + engine_from_network, + network_from_onnx_path, + save_engine, +) def main(): diff --git a/tools/Polygraphy/examples/api/06_immediate_eval_api/load_and_run.py b/tools/Polygraphy/examples/api/06_immediate_eval_api/load_and_run.py index 6219ae06..1cabcfea 100644 --- a/tools/Polygraphy/examples/api/06_immediate_eval_api/load_and_run.py +++ b/tools/Polygraphy/examples/api/06_immediate_eval_api/load_and_run.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/examples/api/07_tensorrt_and_dynamic_shapes/example.py b/tools/Polygraphy/examples/api/07_tensorrt_and_dynamic_shapes/example.py index 771bbc53..7842e82d 100644 --- a/tools/Polygraphy/examples/api/07_tensorrt_and_dynamic_shapes/example.py +++ b/tools/Polygraphy/examples/api/07_tensorrt_and_dynamic_shapes/example.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -50,13 +50,16 @@ def main(): # The dynamic batching case. We use `4` for the opt batch size since that's our most common case. Profile().add("X", min=(1, 3, 28, 28), opt=(4, 3, 28, 28), max=(32, 3, 28, 28)), # The offline case. For best performance, min == opt == max. - Profile().add("X", min=(128, 3, 28, 28), opt=(128, 3, 28, 28), max=(128, 3, 28, 28)), + Profile().add( + "X", min=(128, 3, 28, 28), opt=(128, 3, 28, 28), max=(128, 3, 28, 28) + ), ] # See examples/api/06_immediate_eval_api for details on immediately evaluated functional loaders like `engine_from_network`. # Note that we can freely mix lazy and immediately-evaluated loaders. engine = engine_from_network( - network_from_onnx_path("dynamic_identity.onnx"), config=CreateConfig(profiles=profiles) + network_from_onnx_path("dynamic_identity.onnx"), + config=CreateConfig(profiles=profiles), ) # We'll save the engine so that we can inspect it with `inspect model`. @@ -134,9 +137,13 @@ def main(): # # Alternatively, we could have used the `optimization_profile` parameter (see above). # - offline.set_profile(2) # Use the third profile, which is intended for the offline case. + offline.set_profile( + 2 + ) # Use the third profile, which is intended for the offline case. - large_offline_batch = np.repeat(input_img, 128, axis=0) # Shape: (128, 3, 28, 28) + large_offline_batch = np.repeat( + input_img, 128, axis=0 + ) # Shape: (128, 3, 28, 28) outputs = offline.infer({"X": large_offline_batch}) assert np.array_equal(outputs["Y"], large_offline_batch) diff --git a/tools/Polygraphy/examples/api/08_working_with_run_results_and_saved_inputs_manually/example.py b/tools/Polygraphy/examples/api/08_working_with_run_results_and_saved_inputs_manually/example.py index 6c1f3073..3955928a 100644 --- a/tools/Polygraphy/examples/api/08_working_with_run_results_and_saved_inputs_manually/example.py +++ b/tools/Polygraphy/examples/api/08_working_with_run_results_and_saved_inputs_manually/example.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/examples/api/09_working_with_pytorch_tensors/example.py b/tools/Polygraphy/examples/api/09_working_with_pytorch_tensors/example.py index 3878f32b..a1e8df83 100644 --- a/tools/Polygraphy/examples/api/09_working_with_pytorch_tensors/example.py +++ b/tools/Polygraphy/examples/api/09_working_with_pytorch_tensors/example.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,7 +21,13 @@ import torch -from polygraphy.backend.trt import Calibrator, CreateConfig, TrtRunner, engine_from_network, network_from_onnx_path +from polygraphy.backend.trt import ( + Calibrator, + CreateConfig, + TrtRunner, + engine_from_network, + network_from_onnx_path, +) # If your PyTorch installation has GPU support, then we'll allocate the tensors # directly in GPU memory. This will mean that the calibrator and runner can skip the @@ -38,7 +44,8 @@ def main(): calibrator = Calibrator(data_loader=calib_data()) engine = engine_from_network( - network_from_onnx_path("identity.onnx"), config=CreateConfig(int8=True, calibrator=calibrator) + network_from_onnx_path("identity.onnx"), + config=CreateConfig(int8=True, calibrator=calibrator), ) with TrtRunner(engine) as runner: diff --git a/tools/Polygraphy/examples/cli/convert/01_int8_calibration_in_tensorrt/data_loader.py b/tools/Polygraphy/examples/cli/convert/01_int8_calibration_in_tensorrt/data_loader.py index ff8c45e8..06500ad9 100644 --- a/tools/Polygraphy/examples/cli/convert/01_int8_calibration_in_tensorrt/data_loader.py +++ b/tools/Polygraphy/examples/cli/convert/01_int8_calibration_in_tensorrt/data_loader.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -28,4 +28,6 @@ def load_data(): for _ in range(5): - yield {"x": np.ones(shape=INPUT_SHAPE, dtype=np.float32)} # Still totally real data + yield { + "x": np.ones(shape=INPUT_SHAPE, dtype=np.float32) + } # Still totally real data diff --git a/tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/README.md b/tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/README.md index d0679707..0fe8d60a 100644 --- a/tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/README.md +++ b/tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/README.md @@ -39,8 +39,8 @@ The original and the replaced model can be compared to check if they behave the 1. Find and save matches of toyPlugin in the example network: ```bash - polygraphy plugin match graph_with_subgraph_matching_toy_plugin.onnx \ - --plugin-dir ./plugins + polygraphy plugin match toy_subgraph.onnx \ + --plugin-dir ./plugins -o config.yaml ``` @@ -75,7 +75,7 @@ The original and the replaced model can be compared to check if they behave the 2. **[Optional]** List matches of toyPlugin in the example network, without saving config.yaml: ```bash - polygraphy plugin list graph_with_subgraph_matching_toy_plugin.onnx \ + polygraphy plugin list toy_subgraph.onnx \ --plugin-dir ./plugins ``` @@ -106,17 +106,16 @@ The `plugin replace` subtool replaces subgraphs in an onnx model with plugins 3. Replace parts of the example network with toyPlugin: ```bash - polygraphy plugin replace graph_with_subgraph_matching_toy_plugin.onnx \ - --plugin-dir ./plugins \ - -o replaced.onnx + polygraphy plugin replace toy_subgraph.onnx \ + --plugin-dir ./plugins --config config.yaml -o replaced.onnx ``` This will display something like: ``` - [I] Loading model: /Users/pkisfaludi/Documents/git/Polygraphy/examples/cli/plugin/03_replace_subgraph_with_a_plugin/graph_with_subgraph_matching_toy_plugin.onnx + [I] Loading model: /Users/pkisfaludi/Documents/git/Polygraphy/examples/cli/plugin/03_replace_subgraph_with_a_plugin/toy_subgraph.onnx ``` The result file is replaced.onnx, where a subgraph in the example network is replaced by toyPlugin - \ No newline at end of file + diff --git a/tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/plugins/toyPlugin/__init__.py b/tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/plugins/toyPlugin/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/plugins/toyPlugin/pattern.py b/tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/plugins/toyPlugin/pattern.py index 6cf600ba..00c52974 100644 --- a/tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/plugins/toyPlugin/pattern.py +++ b/tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/plugins/toyPlugin/pattern.py @@ -1,7 +1,8 @@ from polygraphy import mod gs = mod.lazy_import("onnx_graphsurgeon>=0.5.0") +from typing import List,Dict -def get_plugin_pattern() -> gs.GraphPattern: +def get_plugin_pattern(): """ Toy plugin pattern: A B @@ -23,9 +24,25 @@ def get_plugin_pattern() -> gs.GraphPattern: return pattern -def get_plugin_attributes(sg) -> dict: - """ - example plugin attribute mapping, where the plugin has attribute ToyX, which gets its value from C.x * 2 - """ - return {"ToyX": int(sg.get("Cnode").attrs["x"]) * 2} +def get_matching_subgraphs(graph) -> List[Dict[str,str]]: + gp = get_plugin_pattern() + matches = gp.match_all(graph) + ans = [] + for m in matches: + # save the input and output tensor names of the matching subgraph(s) + input_tensors = list(set([ip_tensor.name for ip_tensor in m.inputs])) + output_tensors = list(set([op_tensor.name for op_tensor in m.outputs])) + + attrs = {"ToyX": int(m.get("Cnode").attrs["x"]) * 2} + ioa = { + 'inputs':input_tensors, + 'outputs':output_tensors, + 'attributes':attrs + } + ans.append(ioa) + return ans +def get_plugin_metadata() -> Dict[str,str]: + return {'name':'toyPlugin', + 'op':'CustomToyPlugin', + } diff --git a/tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/graph_with_subgraph_matching_toy_plugin.onnx b/tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/toy_subgraph.onnx similarity index 100% rename from tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/graph_with_subgraph_matching_toy_plugin.onnx rename to tools/Polygraphy/examples/cli/plugin/01_match_and_replace_plugin/toy_subgraph.onnx diff --git a/tools/Polygraphy/examples/cli/run/04_defining_a_tensorrt_network_or_config_manually/create_config.py b/tools/Polygraphy/examples/cli/run/04_defining_a_tensorrt_network_or_config_manually/create_config.py index eeec81db..29fb2874 100644 --- a/tools/Polygraphy/examples/cli/run/04_defining_a_tensorrt_network_or_config_manually/create_config.py +++ b/tools/Polygraphy/examples/cli/run/04_defining_a_tensorrt_network_or_config_manually/create_config.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/examples/cli/run/04_defining_a_tensorrt_network_or_config_manually/define_network.py b/tools/Polygraphy/examples/cli/run/04_defining_a_tensorrt_network_or_config_manually/define_network.py index 10e2d9e5..d5548261 100755 --- a/tools/Polygraphy/examples/cli/run/04_defining_a_tensorrt_network_or_config_manually/define_network.py +++ b/tools/Polygraphy/examples/cli/run/04_defining_a_tensorrt_network_or_config_manually/define_network.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,6 +23,7 @@ parse_onnx = NetworkFromOnnxPath("identity.onnx") + # If we define a function called `load_network`, polygraphy can # use it directly in place of using a model file. # diff --git a/tools/Polygraphy/examples/cli/run/05_comparing_with_custom_input_data/data_loader.py b/tools/Polygraphy/examples/cli/run/05_comparing_with_custom_input_data/data_loader.py index 1d025ef5..bde44639 100644 --- a/tools/Polygraphy/examples/cli/run/05_comparing_with_custom_input_data/data_loader.py +++ b/tools/Polygraphy/examples/cli/run/05_comparing_with_custom_input_data/data_loader.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -31,10 +31,13 @@ INPUT_SHAPE = (1, 2, 28, 28) + # Option 1: Define a function that will yield feed_dicts (i.e. Dict[str, np.ndarray]) def load_data(): for _ in range(5): - yield {"x": np.ones(shape=INPUT_SHAPE, dtype=np.float32)} # Still totally real data + yield { + "x": np.ones(shape=INPUT_SHAPE, dtype=np.float32) + } # Still totally real data # Option 2: Create a JSON file containing the input data using the `save_json()` helper. diff --git a/tools/Polygraphy/examples/cli/run/06_comparing_with_custom_output_data/generate_data.py b/tools/Polygraphy/examples/cli/run/06_comparing_with_custom_output_data/generate_data.py index df44eaa5..9b4aca50 100644 --- a/tools/Polygraphy/examples/cli/run/06_comparing_with_custom_output_data/generate_data.py +++ b/tools/Polygraphy/examples/cli/run/06_comparing_with_custom_output_data/generate_data.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/examples/cli/run/08_adding_precision_constraints/add_constraints.py b/tools/Polygraphy/examples/cli/run/08_adding_precision_constraints/add_constraints.py index fac9de5f..7b993305 100755 --- a/tools/Polygraphy/examples/cli/run/08_adding_precision_constraints/add_constraints.py +++ b/tools/Polygraphy/examples/cli/run/08_adding_precision_constraints/add_constraints.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/examples/cli/run/08_adding_precision_constraints/constrained_network.py b/tools/Polygraphy/examples/cli/run/08_adding_precision_constraints/constrained_network.py index 2a420031..3a56a440 100755 --- a/tools/Polygraphy/examples/cli/run/08_adding_precision_constraints/constrained_network.py +++ b/tools/Polygraphy/examples/cli/run/08_adding_precision_constraints/constrained_network.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/__init__.py b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/__init__.py index fb3b870b..dadebabb 100644 --- a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/__init__.py +++ b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/__init__.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/args/__init__.py b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/args/__init__.py index f48a3bc8..f82a9b42 100644 --- a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/args/__init__.py +++ b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/args/__init__.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/args/loader.py b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/args/loader.py index f7a7b6f5..5a3ffba7 100644 --- a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/args/loader.py +++ b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/args/loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -96,14 +96,20 @@ def add_to_script_impl(self, script): # First, import the loader from Polygraphy: script.add_import(imports=["GsFromOnnx"], frm="polygraphy.backend.onnx") # Next, invoke the loader with arguments (in this case, the ONNX model loader name), and add it to the script. - loader_name = script.add_loader(make_invocable("GsFromOnnx", loader_name), loader_id="gs_from_onnx") + loader_name = script.add_loader( + make_invocable("GsFromOnnx", loader_name), loader_id="gs_from_onnx" + ) # Finally, add the ReplaceReshapeArgs loader. # Unlike the Polygraphy loaders, we'll need to import our loader from the extension module. - script.add_import(imports=["ReplaceReshapes"], frm="polygraphy_reshape_destroyer.backend") + script.add_import( + imports=["ReplaceReshapes"], frm="polygraphy_reshape_destroyer.backend" + ) # Add the loader and return the ID so that it can be used by subsequent loaders or runners. # NOTE: We can provide additional positional and keyword arguments to `make_invocable` to pass them on to the loader. return script.add_loader( - make_invocable("ReplaceReshapes", loader_name, rename_nodes=self.rename_nodes), + make_invocable( + "ReplaceReshapes", loader_name, rename_nodes=self.rename_nodes + ), loader_id="replace_reshapes", ) diff --git a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/args/runner.py b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/args/runner.py index 1de79035..825bd021 100644 --- a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/args/runner.py +++ b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/args/runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -71,10 +71,14 @@ def add_to_script_impl(self, script): loader_name = self.arg_groups[ReplaceReshapeArgs].add_to_script(script) # Next, we'll add an import for our runner. - script.add_import(imports=["IdentityOnlyRunner"], frm="polygraphy_reshape_destroyer.backend") + script.add_import( + imports=["IdentityOnlyRunner"], frm="polygraphy_reshape_destroyer.backend" + ) # Lastly, we can add our runner using the `Script.add_runner()` API. # Like in the loader implementation, additional arguments can be provided directly to `make_invocable`. - script.add_runner(make_invocable("IdentityOnlyRunner", loader_name, speed=self.speed)) + script.add_runner( + make_invocable("IdentityOnlyRunner", loader_name, speed=self.speed) + ) # NOTE: Unlike the `add_to_script_impl` method of regular `BaseArgs`, that of `BaseRunnerArgs` # is not expected to return anything. diff --git a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/backend/__init__.py b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/backend/__init__.py index 5e210e27..99c340ba 100644 --- a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/backend/__init__.py +++ b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/backend/__init__.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/backend/loader.py b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/backend/loader.py index 13d8667a..615fae80 100644 --- a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/backend/loader.py +++ b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/backend/loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -61,7 +61,9 @@ class ReplaceReshapes(BaseLoader): Functor that replaces no-op Reshape nodes in an ONNX-GraphSurgeon graph with Identity. """ - def __init__(self, graph: Union[gs.Graph, Callable[[], gs.Graph]], rename_nodes: bool = None): + def __init__( + self, graph: Union[gs.Graph, Callable[[], gs.Graph]], rename_nodes: bool = None + ): """ Replaces no-op Reshape nodes in an ONNX-GraphSurgeon graph with Identity. diff --git a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/backend/runner.py b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/backend/runner.py index b5c2ed75..bb6a280e 100644 --- a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/backend/runner.py +++ b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/backend/runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -59,7 +59,9 @@ def __init__(self, graph, name=None, speed: str = None): VALID_SPEEDS = ["slow", "medium", "fast"] if self.speed not in VALID_SPEEDS: # Like Polygraphy, extension modules should use `G_LOGGER.critical()` for any unrecoverable errors. - G_LOGGER.critical(f"Invalid speed: {self.speed}. Note: Valid speeds are: {VALID_SPEEDS}") + G_LOGGER.critical( + f"Invalid speed: {self.speed}. Note: Valid speeds are: {VALID_SPEEDS}" + ) @util.check_called_by("activate") def activate_impl(self): diff --git a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/export.py b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/export.py index ec3b0349..92de511b 100644 --- a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/export.py +++ b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/polygraphy_reshape_destroyer/export.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,6 +22,7 @@ from polygraphy_reshape_destroyer.args import ReplaceReshapeArgs, IdentityOnlyRunnerArgs + # The entry point is expected to take no arguments and return a list of argument group instances. # # NOTE: Argument groups will be parsed in the order in which they are provided, diff --git a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/setup.py b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/setup.py index a8290580..3150d1a3 100644 --- a/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/setup.py +++ b/tools/Polygraphy/examples/dev/02_extending_polygraphy_run/extension_module/setup.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/__init__.py b/tools/Polygraphy/polygraphy/__init__.py index 41215171..2feb03ed 100644 --- a/tools/Polygraphy/polygraphy/__init__.py +++ b/tools/Polygraphy/polygraphy/__init__.py @@ -1,3 +1,3 @@ import polygraphy.config -__version__ = "0.49.9" +__version__ = "0.49.10" diff --git a/tools/Polygraphy/polygraphy/backend/base/loader.py b/tools/Polygraphy/polygraphy/backend/base/loader.py index 481cbfe1..ab9f4330 100644 --- a/tools/Polygraphy/polygraphy/backend/base/loader.py +++ b/tools/Polygraphy/polygraphy/backend/base/loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/backend/base/runner.py b/tools/Polygraphy/polygraphy/backend/base/runner.py index ae033e61..7f9c771b 100644 --- a/tools/Polygraphy/polygraphy/backend/base/runner.py +++ b/tools/Polygraphy/polygraphy/backend/base/runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -126,7 +126,9 @@ def get_input_metadata(self, use_numpy_dtypes=None): TensorMetadata: Input names, shapes, and data types. """ if not self.is_active: - G_LOGGER.critical(f"{self.name:35} | Must be activated prior to calling get_input_metadata()") + G_LOGGER.critical( + f"{self.name:35} | Must be activated prior to calling get_input_metadata()" + ) use_numpy_dtypes = util.default(use_numpy_dtypes, True) @@ -180,11 +182,16 @@ def infer(self, feed_dict, check_inputs=True, *args, **kwargs): outputs from multiple inferences, you should make a copy with ``copy.deepcopy(outputs)``. """ if not self.is_active: - G_LOGGER.critical(f"{self.name:35} | Must be activated prior to calling infer()") + G_LOGGER.critical( + f"{self.name:35} | Must be activated prior to calling infer()" + ) if check_inputs: input_metadata = self.get_input_metadata(use_numpy_dtypes=False) - G_LOGGER.verbose(f"{self.name:35} | Input metadata is: {input_metadata}", mode=LogMode.ONCE) + G_LOGGER.verbose( + f"{self.name:35} | Input metadata is: {input_metadata}", + mode=LogMode.ONCE, + ) base_util.check_inputs(feed_dict, input_metadata) return self.infer_impl(feed_dict, *args, **kwargs) @@ -246,4 +253,6 @@ def deactivate(self): def __del__(self): if self.is_active: # __del__ is not guaranteed to be called, but when it is, this could be a useful warning. - print(f"[W] {self.name:35} | Was activated but never deactivated. This could cause a memory leak!") + print( + f"[W] {self.name:35} | Was activated but never deactivated. This could cause a memory leak!" + ) diff --git a/tools/Polygraphy/polygraphy/backend/base/util.py b/tools/Polygraphy/polygraphy/backend/base/util.py index 03821811..0dce8dd1 100644 --- a/tools/Polygraphy/polygraphy/backend/base/util.py +++ b/tools/Polygraphy/polygraphy/backend/base/util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,7 +29,9 @@ def check_inputs(feed_dict, input_metadata): input_metadata (TensorMetadata): The expected input metadata. """ - util.check_sequence_contains(feed_dict.keys(), input_metadata.keys(), name="input data", items_name="inputs") + util.check_sequence_contains( + feed_dict.keys(), input_metadata.keys(), name="input data", items_name="inputs" + ) for name, inp in feed_dict.items(): meta = input_metadata[name] diff --git a/tools/Polygraphy/polygraphy/backend/common/loader.py b/tools/Polygraphy/polygraphy/backend/common/loader.py index 38894bf6..c246dfa4 100644 --- a/tools/Polygraphy/polygraphy/backend/common/loader.py +++ b/tools/Polygraphy/polygraphy/backend/common/loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/backend/onnx/loader.py b/tools/Polygraphy/polygraphy/backend/onnx/loader.py index 6992ad00..44a5897d 100644 --- a/tools/Polygraphy/polygraphy/backend/onnx/loader.py +++ b/tools/Polygraphy/polygraphy/backend/onnx/loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -28,14 +28,18 @@ np = mod.lazy_import("numpy") onnx = mod.lazy_import("onnx>=1.8.1") onnxrt = mod.lazy_import("onnxruntime>=1.10.0") -onnxmltools = mod.lazy_import("onnxmltools==1.11.1", requires=["onnxconverter_common==1.12.2"]) +onnxmltools = mod.lazy_import( + "onnxmltools==1.11.1", requires=["onnxconverter_common==1.12.2"] +) tf = mod.lazy_import("tensorflow<2.0") tf2onnx = mod.lazy_import("tf2onnx") tf_util = mod.lazy_import("polygraphy.backend.tf.util", log=False) gs = mod.lazy_import("onnx_graphsurgeon>=0.3.27") # ONNX-RT's shape inference also requires "sympy", but it is not reported as a dependency, # so we work around it by checking for it manually. -onnxrt_symbolic_shape_inference = mod.lazy_import("onnxruntime.tools.symbolic_shape_infer>=1.10.0", requires=["sympy"]) +onnxrt_symbolic_shape_inference = mod.lazy_import( + "onnxruntime.tools.symbolic_shape_infer>=1.10.0", requires=["sympy"] +) LARGE_MODEL_THRESHOLD = 512 << 20 # 512 MiB PROTOBUF_THRESHOLD = 2e9 @@ -151,7 +155,9 @@ def call_impl(self): """ G_LOGGER.info(f"Loading model: {self.path}") # If external_data_dir is not None, we'll load external data ourselves - auto_load_ext_data = self.external_data_dir is None and not self.ignore_external_data + auto_load_ext_data = ( + self.external_data_dir is None and not self.ignore_external_data + ) try: model = onnx.load(self.path, load_external_data=auto_load_ext_data) except FileNotFoundError: @@ -165,7 +171,9 @@ def call_impl(self): if self.external_data_dir is not None: G_LOGGER.verbose(f"Loading external data from: {self.external_data_dir}") - onnx.external_data_helper.load_external_data_for_model(model, self.external_data_dir) + onnx.external_data_helper.load_external_data_for_model( + model, self.external_data_dir + ) return model @@ -202,13 +210,20 @@ def call_impl(self): graphdef = graph.as_graph_def() if self.optimize: - graphdef = tf2onnx.tfonnx.tf_optimize(input_names, output_names, graph.as_graph_def()) + graphdef = tf2onnx.tfonnx.tf_optimize( + input_names, output_names, graph.as_graph_def() + ) - with tf.Graph().as_default() as graph, tf.compat.v1.Session(graph=graph) as sess: + with tf.Graph().as_default() as graph, tf.compat.v1.Session( + graph=graph + ) as sess: tf.import_graph_def(graphdef, name="") onnx_graph = tf2onnx.tfonnx.process_tf_graph( - graph, input_names=input_names, output_names=output_names, opset=self.opset + graph, + input_names=input_names, + output_names=output_names, + opset=self.opset, ) if self.optimize: onnx_graph = tf2onnx.optimizer.optimize_graph(onnx_graph) @@ -386,14 +401,18 @@ def run_const_fold_pass(model): del model graph.fold_constants( - fold_shapes=self.fold_shapes, partitioning=self.partitioning, size_threshold=self.size_threshold + fold_shapes=self.fold_shapes, + partitioning=self.partitioning, + size_threshold=self.size_threshold, ) model = gs.export_onnx(graph.cleanup(), do_type_check=False) del graph if self.fold_shapes and self.do_shape_inference: - model = infer_shapes(model, allow_onnxruntime=self.allow_onnxruntime_shape_inference) + model = infer_shapes( + model, allow_onnxruntime=self.allow_onnxruntime_shape_inference + ) return model # Need to manually trigger the autoinstall this since it's used by ONNX-GS, which does not have an autoinstall mechanism. @@ -410,7 +429,9 @@ def run_const_fold_pass(model): postfold_num_nodes = -1 index = 0 - while (prefold_num_nodes != postfold_num_nodes) and (self.num_passes is None or index < self.num_passes): + while (prefold_num_nodes != postfold_num_nodes) and ( + self.num_passes is None or index < self.num_passes + ): prefold_num_nodes = onnx_util.get_num_nodes(model) G_LOGGER.start(f"Folding Constants | Pass {index + 1}") @@ -419,7 +440,9 @@ def run_const_fold_pass(model): except Exception as err: if not self.error_ok: raise - G_LOGGER.warning(f"Constant folding pass failed. Skipping subsequent passes.\nNote: Error was:\n{err}") + G_LOGGER.warning( + f"Constant folding pass failed. Skipping subsequent passes.\nNote: Error was:\n{err}" + ) break else: postfold_num_nodes = onnx_util.get_num_nodes(model) @@ -484,12 +507,18 @@ def set_upper_bound(graph, target_tensor_list): assert len(tensor.inputs) == 1 producer = tensor.inputs[0] producer_idx = producer.outputs.index(tensor) - tensor_copy = gs.Variable(tensor.name + "_copy", dtype=tensor.dtype, shape=tensor.shape) + tensor_copy = gs.Variable( + tensor.name + "_copy", dtype=tensor.dtype, shape=tensor.shape + ) upper_bound_values = np.array(upper_bound) if tensor.shape is not None and len(tensor.shape) > 0: upper_bound_values = np.array([upper_bound] * len(tensor.shape)) - tensor_upper_bound = gs.Constant(tensor.name + "_upper_bound", values=upper_bound_values) - min_node = gs.Node(op="Min", inputs=[tensor_copy, tensor_upper_bound], outputs=[tensor]) + tensor_upper_bound = gs.Constant( + tensor.name + "_upper_bound", values=upper_bound_values + ) + min_node = gs.Node( + op="Min", inputs=[tensor_copy, tensor_upper_bound], outputs=[tensor] + ) producer.outputs[producer_idx] = tensor_copy tensor.inputs = [min_node] graph.nodes.append(min_node) @@ -573,7 +602,9 @@ def __init__( self.error_ok = util.default(error_ok, True) self.external_data_dir = external_data_dir # Subtract a little so we're below the real threshold - self.save_to_disk_threshold_bytes = util.default(save_to_disk_threshold_bytes, PROTOBUF_THRESHOLD) + self.save_to_disk_threshold_bytes = util.default( + save_to_disk_threshold_bytes, PROTOBUF_THRESHOLD + ) self.allow_onnxruntime = util.default(allow_onnxruntime, True) def _run_onnx_shape_inference(self, model, external_data_dir): @@ -603,7 +634,9 @@ def _run_onnx_shape_inference(self, model, external_data_dir): if isinstance(model, onnx.ModelProto): model = onnx.shape_inference.infer_shapes(model) else: - tmp_path = util.NamedTemporaryFile(prefix="tmp_polygraphy_", suffix=".onnx").name + tmp_path = util.NamedTemporaryFile( + prefix="tmp_polygraphy_", suffix=".onnx" + ).name G_LOGGER.verbose(f"Writing shape-inferred model to: {tmp_path}") onnx.shape_inference.infer_shapes_path(model, tmp_path) # In cases where the original model had external data stored in the same directory, @@ -611,14 +644,19 @@ def _run_onnx_shape_inference(self, model, external_data_dir): # In such cases, we need to use the model's directory as the external data path # for the newly generated model. model = onnx_from_path( - tmp_path, external_data_dir=util.default(external_data_dir, os.path.dirname(model) or None) + tmp_path, + external_data_dir=util.default( + external_data_dir, os.path.dirname(model) or None + ), ) return model def _run_onnxruntime_shape_inference(self, model, external_data_dir): if not isinstance(model, onnx.ModelProto): model = onnx_from_path(model, external_data_dir=external_data_dir) - return onnxrt_symbolic_shape_inference.SymbolicShapeInference.infer_shapes(model, auto_merge=True) + return onnxrt_symbolic_shape_inference.SymbolicShapeInference.infer_shapes( + model, auto_merge=True + ) @util.check_called_by("__call__") def call_impl(self): @@ -635,7 +673,9 @@ def call_impl(self): use_onnx_shape_inference = not self.allow_onnxruntime if self.allow_onnxruntime: try: - model = self._run_onnxruntime_shape_inference(model, external_data_dir) + model = self._run_onnxruntime_shape_inference( + model, external_data_dir + ) G_LOGGER.verbose( "Inferred shapes in the model with `onnxruntime.tools.symbolic_shape_infer`.\n" "Note: To force Polygraphy to use `onnx.shape_inference` instead, set `allow_onnxruntime=False` or " @@ -659,7 +699,9 @@ def call_impl(self): if not self.error_ok: raise G_LOGGER.warning(f"ONNX shape inference exited with an error:\n{err}") - G_LOGGER.internal_error(f"ONNX shape inference exited with an error:\n{err}") + G_LOGGER.internal_error( + f"ONNX shape inference exited with an error:\n{err}" + ) if not isinstance(model, onnx.ModelProto): model = onnx_from_path(model, external_data_dir=external_data_dir) @@ -675,7 +717,9 @@ class ExtractSubgraph(BaseLoader): Functor that extracts a subgraph from an ONNX model. """ - def __init__(self, model, input_metadata=None, output_metadata=None, check_meta=None): + def __init__( + self, model, input_metadata=None, output_metadata=None, check_meta=None + ): """ Extracts a subgraph from an ONNX model. @@ -722,7 +766,9 @@ def update_tensor(name, dtype, shape): # No need to update constants if isinstance(tensor, gs.Variable): tensor.dtype, tensor.shape = ( - DataType.to_dtype(DataType.from_dtype(dtype), "onnx") if dtype is not None else None + DataType.to_dtype(DataType.from_dtype(dtype), "onnx") + if dtype is not None + else None ) or tensor.dtype, shape or tensor.shape return tensor @@ -750,15 +796,24 @@ def check_meta(name, dtype, shape, meta_type, needs_shape=True): graph.outputs.clear() for name, (dtype, shape) in self.output_metadata.items(): tensor = update_tensor(name, dtype, shape) - check_meta(name, tensor.dtype, tensor.shape, "Output", needs_shape=False) + check_meta( + name, tensor.dtype, tensor.shape, "Output", needs_shape=False + ) graph.outputs.append(tensor) graph.cleanup() tensor_map = graph.tensors() for tensor in tensor_map.values(): - if isinstance(tensor, gs.Variable) and not tensor.inputs and tensor not in graph.inputs: - consumer_nodes = [f"Node: '{node.name}' (Op: {node.op})" for node in tensor.outputs] + if ( + isinstance(tensor, gs.Variable) + and not tensor.inputs + and tensor not in graph.inputs + ): + consumer_nodes = [ + f"Node: '{node.name}' (Op: {node.op})" + for node in tensor.outputs + ] G_LOGGER.error( f"Tensor: '{tensor.name}' is a variable tensor consumed by: {consumer_nodes}, " "but is not produced by a node or marked as a graph input." @@ -775,7 +830,14 @@ class SaveOnnx(BaseLoader): Functor that saves an ONNX model to the specified path. """ - def __init__(self, model, path, external_data_path=None, size_threshold=None, all_tensors_to_one_file=None): + def __init__( + self, + model, + path, + external_data_path=None, + size_threshold=None, + all_tensors_to_one_file=None, + ): """ Saves an ONNX model to the specified path. @@ -828,12 +890,16 @@ def call_impl(self): external_data_path = self.external_data_path if external_data_path is not None: - G_LOGGER.verbose(f"Saving external data for ONNX model to: {external_data_path}") + G_LOGGER.verbose( + f"Saving external data for ONNX model to: {external_data_path}" + ) try: onnx.external_data_helper.convert_model_to_external_data( model, location=external_data_path, - all_tensors_to_one_file=util.default(self.all_tensors_to_one_file, True), + all_tensors_to_one_file=util.default( + self.all_tensors_to_one_file, True + ), size_threshold=util.default(self.size_threshold, 1024), ) except TypeError: @@ -844,7 +910,9 @@ def call_impl(self): onnx.external_data_helper.convert_model_to_external_data( model, location=external_data_path, - all_tensors_to_one_file=util.default(self.all_tensors_to_one_file, True), + all_tensors_to_one_file=util.default( + self.all_tensors_to_one_file, True + ), ) else: if self.size_threshold is not None: diff --git a/tools/Polygraphy/polygraphy/backend/onnx/util.py b/tools/Polygraphy/polygraphy/backend/onnx/util.py index 1760cf19..a6eff90c 100644 --- a/tools/Polygraphy/polygraphy/backend/onnx/util.py +++ b/tools/Polygraphy/polygraphy/backend/onnx/util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -45,7 +45,12 @@ def _get_num_graph_nodes(graph): def all_tensor_names(model, include_inputs=None): include_inputs = util.default(include_inputs, False) - all_outputs = [output for node in model.graph.node if node.op_type != "Constant" for output in node.output] + all_outputs = [ + output + for node in model.graph.node + if node.op_type != "Constant" + for output in node.output + ] if include_inputs: all_outputs += [inp.name for inp in model.graph.input] all_outputs = util.unique_list(all_outputs) @@ -54,7 +59,9 @@ def all_tensor_names(model, include_inputs=None): def _check_has_tensors(model, outputs): all_outputs = all_tensor_names(model, include_inputs=True) - util.check_sequence_contains(all_outputs, outputs, name="the model", items_name="outputs", check_extra=False) + util.check_sequence_contains( + all_outputs, outputs, name="the model", items_name="outputs", check_extra=False + ) def mark_outputs(model, outputs): @@ -68,7 +75,9 @@ def mark_outputs(model, outputs): value_info_map = {t.name: t for t in model.graph.value_info} out_tensors = [] for output in outputs: - value_info = value_info_map.get(output, onnx.helper.make_empty_tensor_value_info(output)) + value_info = value_info_map.get( + output, onnx.helper.make_empty_tensor_value_info(output) + ) out_tensors.append(value_info) G_LOGGER.ultra_verbose(f"Marked output tensors in ONNX model: {out_tensors}") @@ -125,7 +134,9 @@ def get_values(tensor): try: return onnx_numpy_helper.to_array(tensor) except Exception as err: - G_LOGGER.error(f"Failed to load weights.\nNote: Error was: {err}", mode=LogMode.ONCE) + G_LOGGER.error( + f"Failed to load weights.\nNote: Error was: {err}", mode=LogMode.ONCE + ) return "" @@ -139,7 +150,9 @@ def get_tensor_metadata(tensors): def get_input_metadata(graph): # Some "inputs" are actually weights with initalizers, so we need to eliminate those. initializer_names = {tensor.name for tensor in graph.initializer} - input_tensors = [tensor for tensor in graph.input if tensor.name not in initializer_names] + input_tensors = [ + tensor for tensor in graph.input if tensor.name not in initializer_names + ] return get_tensor_metadata(input_tensors) @@ -182,12 +195,18 @@ def get_opset(): onnx_str += "\n\n" onnx_str += str_from_onnx_graph( - model.graph, tensors={}, show_layers=show_layers, show_attrs=show_attrs, show_weights=show_weights + model.graph, + tensors={}, + show_layers=show_layers, + show_attrs=show_attrs, + show_weights=show_weights, ) return onnx_str -def str_from_onnx_graph(graph, tensors, show_layers, show_attrs, show_weights, indent_level=0): +def str_from_onnx_graph( + graph, tensors, show_layers, show_attrs, show_weights, indent_level=0 +): input_metadata = get_input_metadata(graph) output_metadata = get_output_metadata(graph) initializer_metadata = get_tensor_metadata(graph.initializer) @@ -205,7 +224,9 @@ def str_from_onnx_graph(graph, tensors, show_layers, show_attrs, show_weights, i if show_attrs and graph.doc_string: onnx_str += f"---- Docstring ----\n{graph.doc_string}\n\n" - onnx_str += f"---- {len(input_metadata)} {graph_type} Input(s) ----\n{input_metadata}\n\n" + onnx_str += ( + f"---- {len(input_metadata)} {graph_type} Input(s) ----\n{input_metadata}\n\n" + ) onnx_str += f"---- {len(output_metadata)} {graph_type} Output(s) ----\n{output_metadata}\n\n" onnx_str += f"---- {len(initializer_metadata)} Initializer(s) ----\n" @@ -232,7 +253,12 @@ def get_names_and_meta(names): return names_lst, metadata # Maps values from the AttributeType enum to their string representations, e.g., {1: "FLOAT"} - ATTR_TYPE_MAPPING = dict(zip(onnx.AttributeProto.AttributeType.values(), onnx.AttributeProto.AttributeType.keys())) + ATTR_TYPE_MAPPING = dict( + zip( + onnx.AttributeProto.AttributeType.values(), + onnx.AttributeProto.AttributeType.keys(), + ) + ) # Maps an ONNX attribute to the corresponding Python property ONNX_PYTHON_ATTR_MAPPING = { @@ -257,7 +283,9 @@ def process_attr(attr_str: str): elif attr_str == "TENSOR": tensor_str = f"Tensor: [dtype={get_dtype(processed)}, shape={get_shape(processed)}]" if show_weights: - tensor_str += " | Values:\n" + util.indent_block(str(get_values(processed))) + tensor_str += " | Values:\n" + util.indent_block( + str(get_values(processed)) + ) processed = tensor_str elif attr_str == "GRAPH": processed = "\n" + str_from_onnx_graph( @@ -280,7 +308,9 @@ def process_attr(attr_str: str): if attr_str in ONNX_PYTHON_ATTR_MAPPING: attr_dict[attr.name] = process_attr(attr_str) else: - G_LOGGER.warning(f"Attribute of type {attr_str} is currently unsupported. Skipping attribute.") + G_LOGGER.warning( + f"Attribute of type {attr_str} is currently unsupported. Skipping attribute." + ) else: G_LOGGER.warning( f"Attribute type: {attr.type} was not recognized. Was the graph generated with a newer IR version than the installed `onnx` package? Skipping attribute." @@ -294,7 +324,14 @@ def process_attr(attr_str: str): output_names, output_meta = get_names_and_meta(node.output) onnx_str += util.str_from_layer( - "Node", index, node.name, node.op_type, input_names, input_meta, output_names, output_meta + "Node", + index, + node.name, + node.op_type, + input_names, + input_meta, + output_names, + output_meta, ) if show_attrs: @@ -333,7 +370,9 @@ def set_shapes_from_layerwise_meta(graph, layerwise_meta): for tensor in graph.tensors().values(): if isinstance(tensor, gs.Variable) and tensor.name in layerwise_meta: tensor.shape = layerwise_meta[tensor.name].shape - tensor.dtype = DataType.to_dtype(DataType.from_dtype(layerwise_meta[tensor.name].dtype), "onnx") + tensor.dtype = DataType.to_dtype( + DataType.from_dtype(layerwise_meta[tensor.name].dtype), "onnx" + ) def lower_constant_nodes(graph): @@ -383,7 +422,11 @@ def check_op(node, const_tensor_set): # Find all constant tensors. def get_const_tensors(graph): - return {tensor.name for tensor in graph.tensors().values() if isinstance(tensor, gs.Constant)} + return { + tensor.name + for tensor in graph.tensors().values() + if isinstance(tensor, gs.Constant) + } # Find all dynamic shape symbols, customers will set upper bounds for these symbols when building the model in TensorRT. def get_dynamic_shapes(graph): @@ -426,7 +469,10 @@ def get_target_tensors(graph): if check_node: target_tensor = check_op(node, const_tensor_set) # Avoid duplication. - if target_tensor is not None and target_tensor.name not in target_tensor_names: + if ( + target_tensor is not None + and target_tensor.name not in target_tensor_names + ): target_tensor_names.add(target_tensor.name) target_tensor_list.append(target_tensor) return target_tensor_list diff --git a/tools/Polygraphy/polygraphy/backend/onnxrt/loader.py b/tools/Polygraphy/polygraphy/backend/onnxrt/loader.py index b2eb4ab7..f8f1f4a4 100644 --- a/tools/Polygraphy/polygraphy/backend/onnxrt/loader.py +++ b/tools/Polygraphy/polygraphy/backend/onnxrt/loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -64,5 +64,7 @@ def call_impl(self): ) providers.append(matched_prov) - G_LOGGER.start(f"Creating ONNX-Runtime Inference Session with providers: {providers}") + G_LOGGER.start( + f"Creating ONNX-Runtime Inference Session with providers: {providers}" + ) return onnxrt.InferenceSession(model_bytes, providers=providers) diff --git a/tools/Polygraphy/polygraphy/backend/onnxrt/runner.py b/tools/Polygraphy/polygraphy/backend/onnxrt/runner.py index 2c4bf7a0..2b3b1c88 100644 --- a/tools/Polygraphy/polygraphy/backend/onnxrt/runner.py +++ b/tools/Polygraphy/polygraphy/backend/onnxrt/runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/backend/pluginref/references.py b/tools/Polygraphy/polygraphy/backend/pluginref/references.py index 2a50006c..1c7e842d 100644 --- a/tools/Polygraphy/polygraphy/backend/pluginref/references.py +++ b/tools/Polygraphy/polygraphy/backend/pluginref/references.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -53,7 +53,9 @@ def wrapped_func(node, intermediate_tensors): f"{op} reference implementation returned the wrong number of outputs.\nNote: Expected {len(node.outputs)} but recevied {len(outputs)}" ) - return {out_tensor.name: out for out_tensor, out in zip(node.outputs, outputs)} + return { + out_tensor.name: out for out_tensor, out in zip(node.outputs, outputs) + } OP_REGISTRY[op] = wrapped_func return wrapped_func diff --git a/tools/Polygraphy/polygraphy/backend/pluginref/runner.py b/tools/Polygraphy/polygraphy/backend/pluginref/runner.py index 150bc38d..e89f050f 100644 --- a/tools/Polygraphy/polygraphy/backend/pluginref/runner.py +++ b/tools/Polygraphy/polygraphy/backend/pluginref/runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -61,9 +61,13 @@ def infer_impl(self, feed_dict): intermediate_tensors = copy.copy(feed_dict) for node in self.graph.nodes: if node.op not in OP_REGISTRY: - G_LOGGER.critical(f"Op: {node.op} does not have a reference implementation registered!") + G_LOGGER.critical( + f"Op: {node.op} does not have a reference implementation registered!" + ) - intermediate_tensors.update(OP_REGISTRY[node.op](node, intermediate_tensors)) + intermediate_tensors.update( + OP_REGISTRY[node.op](node, intermediate_tensors) + ) outputs = OrderedDict() for out in self.graph.outputs: diff --git a/tools/Polygraphy/polygraphy/backend/pyt/runner.py b/tools/Polygraphy/polygraphy/backend/pyt/runner.py index 4af81f1f..2240ddb1 100644 --- a/tools/Polygraphy/polygraphy/backend/pyt/runner.py +++ b/tools/Polygraphy/polygraphy/backend/pyt/runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -63,7 +63,9 @@ def infer_impl(self, feed_dict): with torch.no_grad(): inputs = [ torch.from_numpy(val.astype(dtype)).cuda() - for (val, (dtype, _)) in zip(feed_dict.values(), self.input_metadata.values()) + for (val, (dtype, _)) in zip( + feed_dict.values(), self.input_metadata.values() + ) ] start = time.time() outputs = self.model(*inputs) diff --git a/tools/Polygraphy/polygraphy/backend/tf/loader.py b/tools/Polygraphy/polygraphy/backend/tf/loader.py index 3b86dcab..49550623 100644 --- a/tools/Polygraphy/polygraphy/backend/tf/loader.py +++ b/tools/Polygraphy/polygraphy/backend/tf/loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -42,7 +42,11 @@ def __init__(self, graph): self._graph = graph def constfold(self, graphdef, output_names): - from tensorflow.core.protobuf import config_pb2, meta_graph_pb2, rewriter_config_pb2 + from tensorflow.core.protobuf import ( + config_pb2, + meta_graph_pb2, + rewriter_config_pb2, + ) from tensorflow.python.framework import importer, ops from tensorflow.python.grappler import tf_optimizer from tensorflow.python.training import saver @@ -55,12 +59,16 @@ def constfold(self, graphdef, output_names): output_list.append(output.encode("utf-8")) importer.import_graph_def(graphdef, name="") - metagraph = saver.export_meta_graph(graph_def=graph.as_graph_def(add_shapes=True), graph=graph) + metagraph = saver.export_meta_graph( + graph_def=graph.as_graph_def(add_shapes=True), graph=graph + ) metagraph.collection_def["train_op"].CopyFrom(output_collection) rewriter_config = rewriter_config_pb2.RewriterConfig() rewriter_config.optimizers.extend(["constfold"]) - rewriter_config.meta_optimizer_iterations = rewriter_config_pb2.RewriterConfig.ONE + rewriter_config.meta_optimizer_iterations = ( + rewriter_config_pb2.RewriterConfig.ONE + ) session_config = config_pb2.ConfigProto() session_config.graph_options.resave_options.CopyFrom(rewriter_config) @@ -109,7 +117,9 @@ def call_impl(self): # Strip port information from outputs output_names = [name.split(":")[0] for name in output_names] - output_graph_def = tf.graph_util.convert_variables_to_constants(sess, graphdef, output_names) + output_graph_def = tf.graph_util.convert_variables_to_constants( + sess, graphdef, output_names + ) output_graph_def = self.constfold(output_graph_def, output_names) return graph_from_frozen(output_graph_def) @@ -205,10 +215,14 @@ def call_impl(self): # # where "model" is the checkpoint name if not os.path.isdir(self.dir): - G_LOGGER.warning(f"Specified checkpoint directory: {self.dir} does not look like a directory.") + G_LOGGER.warning( + f"Specified checkpoint directory: {self.dir} does not look like a directory." + ) if self.name is None: - G_LOGGER.verbose("Checkpoint name was not explicitly provided, searching for `checkpoint` file") + G_LOGGER.verbose( + "Checkpoint name was not explicitly provided, searching for `checkpoint` file" + ) checkpoint = tf.train.get_checkpoint_state(self.dir) if checkpoint is None: ckpt_file_contents = '\nmodel_checkpoint_path: "model"\nall_model_checkpoint_paths: "model"\n' @@ -220,7 +234,9 @@ def call_impl(self): input_checkpoint = os.path.join(self.dir, self.name) meta_file = input_checkpoint + ".meta" - with tf.Graph().as_default() as graph, tf.compat.v1.Session(graph=graph).as_default() as sess: + with tf.Graph().as_default() as graph, tf.compat.v1.Session( + graph=graph + ).as_default() as sess: saver = tf.compat.v1.train.import_meta_graph(meta_file, clear_devices=True) saver.restore(sess, input_checkpoint) return graph, tf_util.get_graph_output_names(graph) @@ -386,7 +402,12 @@ def call_impl(self): if node.op == "TRTEngineOp": engine = node.attr["serialized_segment"].s if self.engine_dir is not None: - util.save_file(contents=engine, dest=os.path.join(self.engine_dir, f"segment-{segment_number}")) + util.save_file( + contents=engine, + dest=os.path.join( + self.engine_dir, f"segment-{segment_number}" + ), + ) segment_number += 1 return graph, outputs @@ -422,12 +443,17 @@ def call_impl(self): # Session configuration gpu_options = tf.compat.v1.GPUOptions( - per_process_gpu_memory_fraction=self.gpu_memory_fraction, allow_growth=self.allow_growth + per_process_gpu_memory_fraction=self.gpu_memory_fraction, + allow_growth=self.allow_growth, ) config = tf.compat.v1.ConfigProto(gpu_options=gpu_options) if self.use_xla: - config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 - G_LOGGER.verbose(f"Using gpu memory fraction: {self.gpu_memory_fraction}, XLA: {self.use_xla}") + config.graph_options.optimizer_options.global_jit_level = ( + tf.OptimizerOptions.ON_1 + ) + G_LOGGER.verbose( + f"Using gpu memory fraction: {self.gpu_memory_fraction}, XLA: {self.use_xla}" + ) return config @@ -461,7 +487,9 @@ def call_impl(self): config, _ = util.invoke_if_callable(self.config) (graph, output_names), _ = util.invoke_if_callable(self.graph) - with graph.as_default() as graph, tf.compat.v1.Session(graph=graph, config=config).as_default() as sess: + with graph.as_default() as graph, tf.compat.v1.Session( + graph=graph, config=config + ).as_default() as sess: G_LOGGER.verbose(f"Using TensorFlow outputs: {output_names}") G_LOGGER.extra_verbose("Initializing variables in TensorFlow Graph") sess.run(tf.compat.v1.initializers.global_variables()) diff --git a/tools/Polygraphy/polygraphy/backend/tf/runner.py b/tools/Polygraphy/polygraphy/backend/tf/runner.py index 12099855..072598fc 100644 --- a/tools/Polygraphy/polygraphy/backend/tf/runner.py +++ b/tools/Polygraphy/polygraphy/backend/tf/runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -74,7 +74,10 @@ def infer_impl(self, feed_dict): G_LOGGER.extra_verbose(f"Received feed_dict: {feed_dict}") start = time.time() inference_outputs = self.sess.run( - self.output_names, feed_dict=feed_dict, options=self.run_options, run_metadata=self.run_metadata + self.output_names, + feed_dict=feed_dict, + options=self.run_options, + run_metadata=self.run_metadata, ) end = time.time() diff --git a/tools/Polygraphy/polygraphy/backend/tf/util.py b/tools/Polygraphy/polygraphy/backend/tf/util.py index 86d1e753..c1525635 100644 --- a/tools/Polygraphy/polygraphy/backend/tf/util.py +++ b/tools/Polygraphy/polygraphy/backend/tf/util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -46,7 +46,9 @@ def load_graph(path): graphdef.ParseFromString(util.load_file(path, description="GraphDef")) except google.protobuf.message.DecodeError: G_LOGGER.backtrace() - G_LOGGER.critical(f"Could not import TensorFlow GraphDef from: {path}. Is this a valid TensorFlow model?") + G_LOGGER.critical( + f"Could not import TensorFlow GraphDef from: {path}. Is this a valid TensorFlow model?" + ) elif isinstance(path, tf.compat.v1.GraphDef): graphdef = path @@ -79,7 +81,9 @@ def get_tensor_metadata(tensors): metadata = TensorMetadata() for tensor in tensors: try: - shape = [elem.value if hasattr(elem, "value") else elem for elem in tensor.shape] + shape = [ + elem.value if hasattr(elem, "value") else elem for elem in tensor.shape + ] except ValueError: # Happens when rank is unknown shape = None @@ -90,7 +94,9 @@ def get_tensor_metadata(tensors): def get_input_metadata(graph): input_tensors = [] input_nodes = find_nodes_by_ops(graph.as_graph_def(), ["Placeholder", "FIFOQueue"]) - G_LOGGER.verbose(f"Found input tensors: {[f'{n.name}: {n.op}' for n in input_nodes]}") + G_LOGGER.verbose( + f"Found input tensors: {[f'{n.name}: {n.op}' for n in input_nodes]}" + ) for node in input_nodes: input_tensors.append(graph.get_tensor_by_name(node.name + ":0")) @@ -128,7 +134,9 @@ def is_output_node(node): # Additionally, we sometimes need to exclude entire namespaces e.g. while loops. EXCLUDE_NAMESPACES = ["while", "Assert"] - if any([ex_op in node.op for ex_op in EXCLUDE_OPS]) or any([ns in node.name for ns in EXCLUDE_NAMESPACES]): + if any([ex_op in node.op for ex_op in EXCLUDE_OPS]) or any( + [ns in node.name for ns in EXCLUDE_NAMESPACES] + ): G_LOGGER.extra_verbose( f"Excluding {node.name}, op {node.op} is not a valid output op or is part of an excluded namespace (Note: excluded namespaces: {EXCLUDE_NAMESPACES})" ) @@ -139,7 +147,9 @@ def is_output_node(node): # For layerwise mode, every layer becomes an output. if layerwise: output_nodes = list(graphdef.node) - G_LOGGER.verbose(f"Running in layerwise mode. Marking {len(output_nodes)} layers as potential outputs") + G_LOGGER.verbose( + f"Running in layerwise mode. Marking {len(output_nodes)} layers as potential outputs" + ) else: output_nodes = [node for node in graphdef.node if is_output_node(node)] G_LOGGER.extra_verbose(f"Found likely output nodes: {output_nodes}") @@ -157,7 +167,9 @@ def is_output_node(node): f"Excluded {len(output_nodes) - len(output_tensors)} ops that don't seem like outputs. Use -vv/--super-verbose, or set logging verbosity to EXTRA_VERBOSE to view them." ) - G_LOGGER.extra_verbose(f"Found output op types in graph: {set(tensor.op.type for tensor in output_tensors)}") + G_LOGGER.extra_verbose( + f"Found output op types in graph: {set(tensor.op.type for tensor in output_tensors)}" + ) G_LOGGER.verbose(f"Retrieved TensorFlow output_tensors: {output_tensors}") return get_tensor_metadata(output_tensors) @@ -176,7 +188,9 @@ def str_from_graph(graph, show_layers=None, show_attrs=None, show_weights=None): output_metadata = get_output_metadata(graph) graph_str += f"---- {len(input_metadata)} Graph Inputs ----\n{input_metadata}\n\n" - graph_str += f"---- {len(output_metadata)} Graph Outputs ----\n{output_metadata}\n\n" + graph_str += ( + f"---- {len(output_metadata)} Graph Outputs ----\n{output_metadata}\n\n" + ) graph_str += f"---- {len(graph.as_graph_def().node)} Nodes ----\n" if show_layers: G_LOGGER.warning( diff --git a/tools/Polygraphy/polygraphy/backend/trt/__init__.py b/tools/Polygraphy/polygraphy/backend/trt/__init__.py index 6755a8fa..c87af294 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/__init__.py +++ b/tools/Polygraphy/polygraphy/backend/trt/__init__.py @@ -1,6 +1,7 @@ from polygraphy.backend.trt.algorithm_selector import * from polygraphy.backend.trt.calibrator import * from polygraphy.backend.trt.config import * +from polygraphy.backend.trt.file_reader import * from polygraphy.backend.trt.loader import * from polygraphy.backend.trt.profile import * from polygraphy.backend.trt.runner import * diff --git a/tools/Polygraphy/polygraphy/backend/trt/algorithm_selector.py b/tools/Polygraphy/polygraphy/backend/trt/algorithm_selector.py index 35f4153c..e039c81b 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/algorithm_selector.py +++ b/tools/Polygraphy/polygraphy/backend/trt/algorithm_selector.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/backend/trt/calibrator.py b/tools/Polygraphy/polygraphy/backend/trt/calibrator.py index f30c907e..b9361e08 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/calibrator.py +++ b/tools/Polygraphy/polygraphy/backend/trt/calibrator.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,13 +15,15 @@ # limitations under the License. # import contextlib +import copy from collections import OrderedDict from polygraphy import mod, util +from polygraphy.backend.base import util as base_util +from polygraphy.backend.trt import util as trt_util +from polygraphy.datatype import DataType from polygraphy.exception import PolygraphyException from polygraphy.logger import G_LOGGER, LogMode -from polygraphy.backend.trt import util as trt_util -from polygraphy.backend.base import util as base_util trt = mod.lazy_import("tensorrt>=8.5") np = mod.lazy_import("numpy") @@ -29,7 +31,13 @@ @mod.export() def Calibrator( - data_loader, cache=None, BaseClass=None, batch_size=None, quantile=None, regression_cutoff=None, algo=None + data_loader, + cache=None, + BaseClass=None, + batch_size=None, + quantile=None, + regression_cutoff=None, + algo=None, ): """ Supplies calibration data to TensorRT to calibrate the network for INT8 inference. @@ -112,10 +120,25 @@ def set_input_metadata(self, input_metadata): using Polygraphy's included `DataLoader` to provide calibration data, or if data type and shape checking is desired. """ - self.input_metadata = input_metadata - if input_metadata is not None: + calibration_metadata = copy.copy(input_metadata) + for name, meta_tuple in calibration_metadata.items(): + if meta_tuple.dtype not in { + DataType.FLOAT32, + DataType.INT32, + DataType.INT64, + DataType.BOOL, + }: + G_LOGGER.warning( + f"TensorRT requires non-index calibration inputs to be provided in float32. " + f"Input: {name} has datatype: {meta_tuple.dtype}, so will override to float32 in the calibrator's metadata. " + f"If you are using a custom data loader with the calibrator, please ensure that you return a float32 tensor for this input." + ) + meta_tuple.dtype = DataType.FLOAT32 + + self.input_metadata = calibration_metadata + if calibration_metadata is not None: with contextlib.suppress(AttributeError): - self.data_loader.input_metadata = input_metadata + self.data_loader.input_metadata = calibration_metadata def reset(self): """ @@ -160,7 +183,9 @@ def _get_batch_impl(self, names): if isinstance(buf, int): ptrs.append(buf) else: - ptrs.append(trt_util._get_array_on_gpu(buf, name, self.device_buffers)) + ptrs.append( + trt_util._get_array_on_gpu(buf, name, self.device_buffers) + ) return ptrs @@ -182,7 +207,9 @@ def load_from_cache(): try: return util.load_file(self._cache, description="calibration cache") except Exception as err: - G_LOGGER.error(f"Could not read from calibration cache: {self._cache}\nNote: Error was: {err}") + G_LOGGER.error( + f"Could not read from calibration cache: {self._cache}\nNote: Error was: {err}" + ) return None if self.cache_contents is not None: @@ -208,9 +235,15 @@ def write_calibration_cache(self, cache): return try: - util.save_file(contents=self.cache_contents, dest=self._cache, description="calibration cache") + util.save_file( + contents=self.cache_contents, + dest=self._cache, + description="calibration cache", + ) except Exception as err: - G_LOGGER.error(f"Could not write to calibration cache: {self._cache}.\nNote: Error was: {err}") + G_LOGGER.error( + f"Could not write to calibration cache: {self._cache}.\nNote: Error was: {err}" + ) def free(self): """ diff --git a/tools/Polygraphy/polygraphy/backend/trt/config.py b/tools/Polygraphy/polygraphy/backend/trt/config.py index 41fc14c3..317deec0 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/config.py +++ b/tools/Polygraphy/polygraphy/backend/trt/config.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/backend/trt/file_reader.py b/tools/Polygraphy/polygraphy/backend/trt/file_reader.py new file mode 100644 index 00000000..c46051ac --- /dev/null +++ b/tools/Polygraphy/polygraphy/backend/trt/file_reader.py @@ -0,0 +1,80 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from pathlib import Path + +from polygraphy import mod, util +from polygraphy.logger import G_LOGGER + +trt = mod.lazy_import("tensorrt>=10.0") + +@mod.export() +def FileReader( + filepath, + BaseClass=None, +): + """ + Class that supplies data to TensorRT from a stream. This may help reduce memory usage during deserialization. + + Args: + filepath (str): + The path to the serialized file. + + """ + BaseClass = util.default(BaseClass, trt.IStreamReader) + + class FileReaderClass(BaseClass): + """ + Class that supplies data to TensorRT from a stream. This may help reduce memory usage during deserialization. + """ + + def __init__(self): + # Must explicitly initialize parent for any trampoline class! Will mysteriously segfault without this. + BaseClass.__init__(self) # type: ignore + + self.filepath = filepath + + if not Path(self.filepath).exists(): + G_LOGGER.error(f"File at {self.filepath} does not exist!") + + self.mode = 'rb' + self.file = open(self.filepath, self.mode) + if not self.file: + G_LOGGER.error(f"Failed to open file at {self.filepath}!") + + self.make_func = FileReader + + def read(self, size: int) -> bytes: + return self.file.read(size) + + def free(self): + if self.file: + self.file.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.free() + + def __repr__(self): + return util.make_repr( + "FileReader", + self.filepath, + BaseClass=BaseClass, + )[0] + + return FileReaderClass() diff --git a/tools/Polygraphy/polygraphy/backend/trt/loader.py b/tools/Polygraphy/polygraphy/backend/trt/loader.py index bb46766e..2fb4bd41 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/loader.py +++ b/tools/Polygraphy/polygraphy/backend/trt/loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,7 +19,7 @@ from polygraphy import constants, mod, util from polygraphy.backend.base import BaseLoader -from polygraphy.backend.trt import util as trt_util +from polygraphy.backend.trt import util as trt_util, FileReader from polygraphy.backend.trt.config import CreateConfig from polygraphy.datatype import DataType from polygraphy.logger import G_LOGGER @@ -91,7 +91,10 @@ def __init__(self, explicit_batch=None, strongly_typed=None): Whether to mark the network as being strongly typed. Defaults to False. """ - self.explicit_batch = util.default(explicit_batch, True if mod.version(trt.__version__) < mod.version("10.0") else None) + self.explicit_batch = util.default( + explicit_batch, + True if mod.version(trt.__version__) < mod.version("10.0") else None, + ) self.strongly_typed = util.default(strongly_typed, False) @util.check_called_by("__call__") @@ -105,13 +108,17 @@ def call_impl(self): if self.explicit_batch: try: - network_flags |= 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network_flags |= 1 << int( + trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH + ) except AttributeError: trt_util.fail_unavailable("explicit_batch") if self.strongly_typed: try: - network_flags |= 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED) + network_flags |= 1 << int( + trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED + ) except AttributeError: trt_util.fail_unavailable("strongly_typed") @@ -138,7 +145,7 @@ def __init__(self, flags=None, plugin_instancenorm=None, strongly_typed=None): Defaults to False. """ self.flags = flags - self.plugin_instancenorm=util.default(plugin_instancenorm, False) + self.plugin_instancenorm = util.default(plugin_instancenorm, False) self.strongly_typed = util.default(strongly_typed, False) @util.check_called_by("__call__") @@ -165,7 +172,9 @@ class NetworkFromOnnxBytes(BaseNetworkFromOnnx): Functor that parses an ONNX model to create a trt.INetworkDefinition. """ - def __init__(self, model_bytes, flags=None, plugin_instancenorm=None, strongly_typed=None): + def __init__( + self, model_bytes, flags=None, plugin_instancenorm=None, strongly_typed=None + ): """ Parses an ONNX model. @@ -185,7 +194,11 @@ def __init__(self, model_bytes, flags=None, plugin_instancenorm=None, strongly_t Whether to mark the network as being strongly typed. Defaults to False. """ - super().__init__(flags=flags, plugin_instancenorm=plugin_instancenorm, strongly_typed=strongly_typed) + super().__init__( + flags=flags, + plugin_instancenorm=plugin_instancenorm, + strongly_typed=strongly_typed, + ) self._model_bytes = model_bytes @util.check_called_by("__call__") @@ -228,7 +241,11 @@ def __init__(self, path, flags=None, plugin_instancenorm=None, strongly_typed=No Whether to mark the network as being strongly typed. Defaults to False. """ - super().__init__(flags=flags, plugin_instancenorm=plugin_instancenorm, strongly_typed=strongly_typed) + super().__init__( + flags=flags, + plugin_instancenorm=plugin_instancenorm, + strongly_typed=strongly_typed, + ) self.path = path @util.check_called_by("__call__") @@ -272,7 +289,9 @@ def __init__(self, network, func, name=None): # Sanity-check that the function passed in is callable if not callable(func): - G_LOGGER.critical(f"Object {func} (of type {type(func)}) is not a callable.") + G_LOGGER.critical( + f"Object {func} (of type {type(func)}) is not a callable." + ) try: func_name = func.__name__ @@ -334,7 +353,9 @@ def __init__(self, network, outputs=None, exclude_outputs=None): Names of tensors to exclude as outputs. This can be useful in conjunction with ``outputs=constants.MARK_ALL`` to omit outputs. """ - func = lambda network: ModifyNetworkOutputs._apply(network, outputs, exclude_outputs) + func = lambda network: ModifyNetworkOutputs._apply( + network, outputs, exclude_outputs + ) super().__init__(network, func, "ModifyNetworkOutputs") @@ -536,12 +557,17 @@ def call_impl(self): network, show_layers=True, show_attrs=True, - show_weights=G_LOGGER.module_severity.get(G_LOGGER.module_path(__file__)) <= G_LOGGER.ULTRA_VERBOSE, + show_weights=G_LOGGER.module_severity.get( + G_LOGGER.module_path(__file__) + ) + <= G_LOGGER.ULTRA_VERBOSE, ) ) ) - G_LOGGER.start(f"Building engine with configuration:\n{trt_util.str_from_config(config)}") + G_LOGGER.start( + f"Building engine with configuration:\n{trt_util.str_from_config(config)}" + ) start_time = time.time() try: @@ -549,14 +575,20 @@ def call_impl(self): except AttributeError: engine = builder.build_engine(network, config) if not engine: - G_LOGGER.critical("Invalid Engine. Please ensure the engine was built correctly") + G_LOGGER.critical( + "Invalid Engine. Please ensure the engine was built correctly" + ) engine_bytes = engine.serialize() end_time = time.time() if not engine_bytes: - G_LOGGER.critical("Invalid Engine. Please ensure the engine was built correctly") + G_LOGGER.critical( + "Invalid Engine. Please ensure the engine was built correctly" + ) - G_LOGGER.finish(f"Finished engine building in {end_time - start_time:.3f} seconds") + G_LOGGER.finish( + f"Finished engine building in {end_time - start_time:.3f} seconds" + ) if self.timing_cache_path: try: @@ -566,18 +598,28 @@ def call_impl(self): with util.LockFile(self.timing_cache_path): try: - prev_cache = config.create_timing_cache(util.load_file(self.timing_cache_path)) + prev_cache = config.create_timing_cache( + util.load_file(self.timing_cache_path) + ) except: prev_cache = None if timing_cache: if prev_cache is not None: - combine_success = timing_cache.combine(prev_cache, ignore_mismatch=True) + combine_success = timing_cache.combine( + prev_cache, ignore_mismatch=True + ) if not combine_success: - G_LOGGER.warning("Could not combine old timing cache into current timing cache") + G_LOGGER.warning( + "Could not combine old timing cache into current timing cache" + ) with timing_cache.serialize() as buffer: - util.save_file(buffer, self.timing_cache_path, description="tactic timing cache") + util.save_file( + buffer, + self.timing_cache_path, + description="tactic timing cache", + ) return engine_bytes @@ -642,7 +684,9 @@ def __init__(self, serialized_engine, runtime=None): If no runtime is provided, one will be created. """ self._serialized_engine = serialized_engine - self._runtime = util.default(runtime, lambda: trt.Runtime(trt_util.get_trt_logger())) + self._runtime = util.default( + runtime, lambda: trt.Runtime(trt_util.get_trt_logger()) + ) @util.check_called_by("__call__") def call_impl(self): @@ -666,6 +710,51 @@ def call_impl(self): return engine +@mod.export(funcify=True) +class EngineFromPath(BaseLoader): + """ + Functor that deserializes an engine from a path. + """ + + def __init__(self, path: str, runtime=None): + """ + Deserializes an engine from a path. + + Args: + path (Union[str, Callable() -> str]): + The file path to the serialized engine or a callable that returns it. + runtime (Union[trt.Runtime, Callable() -> trt.Runtime]): + The runtime to use when deserializing the engine or a callable that returns one. + If no runtime is provided, one will be created. + """ + self._path = path + self._runtime = util.default( + runtime, lambda: trt.Runtime(trt_util.get_trt_logger()) + ) + + @util.check_called_by("__call__") + def call_impl(self): + """ + Returns: + trt.ICudaEngine: The deserialized engine. + """ + path, _ = util.invoke_if_callable(self._path) + runtime, _ = util.invoke_if_callable(self._runtime) + + trt.init_libnvinfer_plugins(trt_util.get_trt_logger(), "") + try: + # To deserialize version compatible engines, we must signal the runtime that host code is allowed + runtime.engine_host_code_allowed = True + except AttributeError: + pass + + file_reader = FileReader(path) + engine = runtime.deserialize_cuda_engine(file_reader) + if not engine: + G_LOGGER.critical("Could not deserialize engine. See log for details.") + return engine + + @mod.export(funcify=True) class BytesFromEngine(BaseLoader): """ @@ -719,7 +808,9 @@ def call_impl(self): """ engine, _ = util.invoke_if_callable(self._engine) - util.save_file(contents=bytes_from_engine(engine), dest=self.path, description="engine") + util.save_file( + contents=bytes_from_engine(engine), dest=self.path, description="engine" + ) return engine @@ -767,13 +858,19 @@ def tensors_from_names_meta(names, meta): for name in names: if name not in tensor_map: dtype, shape = meta[name] - tensor_map[name] = gs.Variable(name=name, dtype=DataType.to_dtype(dtype, "onnx"), shape=shape) + tensor_map[name] = gs.Variable( + name=name, dtype=DataType.to_dtype(dtype, "onnx"), shape=shape + ) tensors.append(tensor_map[name]) return tensors nodes = [] - graph_inputs = tensors_from_names_meta(*trt_util.get_network_input_names_meta(network)) - graph_outputs = tensors_from_names_meta(*trt_util.get_network_output_names_meta(network)) + graph_inputs = tensors_from_names_meta( + *trt_util.get_network_input_names_meta(network) + ) + graph_outputs = tensors_from_names_meta( + *trt_util.get_network_output_names_meta(network) + ) LAYER_TYPE_CLASS_MAPPING = trt_util.get_layer_class_mapping() @@ -782,8 +879,12 @@ def tensors_from_names_meta(names, meta): if layer.type in LAYER_TYPE_CLASS_MAPPING: layer.__class__ = LAYER_TYPE_CLASS_MAPPING[layer.type] - node_inputs = tensors_from_names_meta(*trt_util.get_layer_input_names_meta(layer)) - node_outputs = tensors_from_names_meta(*trt_util.get_layer_output_names_meta(layer)) + node_inputs = tensors_from_names_meta( + *trt_util.get_layer_input_names_meta(layer) + ) + node_outputs = tensors_from_names_meta( + *trt_util.get_layer_output_names_meta(layer) + ) attrs = {} attr_names = trt_util.get_layer_attribute_names(layer) for name in attr_names: @@ -793,7 +894,9 @@ def tensors_from_names_meta(names, meta): except Exception as err: attr = f"" - if util.is_sequence(attr) or any(isinstance(attr, cls) for cls in [trt.Dims, trt.Permutation]): + if util.is_sequence(attr) or any( + isinstance(attr, cls) for cls in [trt.Dims, trt.Permutation] + ): try: attr = list(attr) except ValueError: # Invalid dims @@ -817,12 +920,23 @@ def tensors_from_names_meta(names, meta): attrs[name] = attr - nodes.append(gs.Node(name=layer.name, op=op_name, attrs=attrs, inputs=node_inputs, outputs=node_outputs)) + nodes.append( + gs.Node( + name=layer.name, + op=op_name, + attrs=attrs, + inputs=node_inputs, + outputs=node_outputs, + ) + ) - graph = gs.Graph(name=network.name, inputs=graph_inputs, outputs=graph_outputs, nodes=nodes) + graph = gs.Graph( + name=network.name, inputs=graph_inputs, outputs=graph_outputs, nodes=nodes + ) return gs.export_onnx(graph) + @mod.export(funcify=True) class MarkDebug(PostprocessNetwork): """ diff --git a/tools/Polygraphy/polygraphy/backend/trt/profile.py b/tools/Polygraphy/polygraphy/backend/trt/profile.py index 0c74315a..0449c97e 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/profile.py +++ b/tools/Polygraphy/polygraphy/backend/trt/profile.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -85,7 +85,9 @@ def __getitem__(self, key): corresponding to the input. """ if key not in self: - G_LOGGER.critical(f"Binding: {key} does not have shapes set in this profile") + G_LOGGER.critical( + f"Binding: {key} does not have shapes set in this profile" + ) return super().__getitem__(key) def fill_defaults(self, network, default_shape_value=None): @@ -103,7 +105,9 @@ def fill_defaults(self, network, default_shape_value=None): Returns: Profile: Self """ - default_shape_value = util.default(default_shape_value, constants.DEFAULT_SHAPE_VALUE) + default_shape_value = util.default( + default_shape_value, constants.DEFAULT_SHAPE_VALUE + ) for idx in range(network.num_inputs): inp = network.get_input(idx) @@ -170,7 +174,9 @@ def to_trt(self, builder, network): if is_shape_tensor: if inp.name in self: shapes = self[inp.name] - trt_profile.set_shape_input(inp.name, shapes.min, shapes.opt, shapes.max) + trt_profile.set_shape_input( + inp.name, shapes.min, shapes.opt, shapes.max + ) G_LOGGER.verbose( f"{trt_util.str_from_tensor(inp, is_shape_tensor)} | Setting input shape-tensor value range to: {shapes}" ) diff --git a/tools/Polygraphy/polygraphy/backend/trt/runner.py b/tools/Polygraphy/polygraphy/backend/trt/runner.py index faf2daad..ef77aa79 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/runner.py +++ b/tools/Polygraphy/polygraphy/backend/trt/runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -44,14 +44,16 @@ def process_debug_tensor(self, addr, location, type, shape, name, stream): cuda.wrapper().memcpy( dst=util.array.data_ptr(buffer), src=addr, - nbytes=size*datatype.itemsize, + nbytes=size * datatype.itemsize, kind=cuda.MemcpyKind.DeviceToHost, - stream_ptr=stream) + stream_ptr=stream, + ) cuda.wrapper().stream_synchronize(stream) self.debug_tensor_outputs[name] = util.array.resize_or_reallocate(buffer, shape) return DebugTensorWriter() + def _make_output_allocator(): class OutputAllocator(trt.IOutputAllocator): @@ -79,7 +81,7 @@ def notify_shape(self, tensor_name, shape): def set_use_torch(self, use_torch): self.use_torch = use_torch - + return OutputAllocator() @@ -138,7 +140,15 @@ class TrtRunner(BaseRunner): be used only for prototyping, testing, and debugging. """ - def __init__(self, engine, name: str = None, optimization_profile: int = None, allocation_strategy: str = None, weight_streaming_budget: int = None, weight_streaming_percent: float = None): + def __init__( + self, + engine, + name: str = None, + optimization_profile: int = None, + allocation_strategy: str = None, + weight_streaming_budget: int = None, + weight_streaming_percent: float = None, + ): """ Args: engine (Union[Union[trt.ICudaEngine, trt.IExecutionContext], Callable() -> Union[trt.ICudaEngine, trt.IExecutionContext]]): @@ -173,7 +183,6 @@ def __init__(self, engine, name: str = None, optimization_profile: int = None, a self.allocation_strategy = allocation_strategy self.weight_streaming_budget = weight_streaming_budget self.weight_streaming_percent = weight_streaming_percent - self.output_allocator = _make_output_allocator() @util.check_called_by("activate") def activate_impl(self): @@ -193,7 +202,7 @@ def activate_impl(self): elif self.weight_streaming_percent is not None: assert 0 <= self.weight_streaming_percent <= 100 if self.weight_streaming_percent == 0: - budget_bytes = 0 # Disable weight streaming + budget_bytes = 0 # Disable weight streaming else: min_budget = self.engine.minimum_weight_streaming_budget max_budget = self.engine.streamable_weights_size @@ -209,15 +218,15 @@ def activate_impl(self): G_LOGGER.info(f"Weight streaming is enabled with TensorRT automatically determiing the budget.") else: G_LOGGER.info(f"Weight streaming is enabled with a memory budget of {budget_bytes} bytes.") - + allocation_strategy = util.default(self.allocation_strategy, "static") - if allocation_strategy == 'static': + if allocation_strategy == "static": self.context = self.engine.create_execution_context() - elif allocation_strategy in ['profile', 'runtime']: + elif allocation_strategy in ["profile", "runtime"]: # Device memory will be managed by polygraphy self.context = self.engine.create_execution_context(trt.ExecutionContextAllocationStrategy.USER_MANAGED) else: - G_LOGGER.critical("Invalid allocation strategy specified.") + G_LOGGER.critical("Invalid allocation strategy specified.") if not self.context: G_LOGGER.critical("Invalid Context. See error log for details.") elif isinstance(engine_or_context, trt.IExecutionContext): @@ -237,6 +246,7 @@ def activate_impl(self): self.host_output_buffers = OrderedDict() self.stream = cuda.Stream() self.context_memory_buffer = None + self.output_allocator = _make_output_allocator() if self.optimization_profile is not None: self.set_profile(self.optimization_profile) @@ -342,8 +352,10 @@ def get_io(mode): if self.allocation_strategy in ["profile", "runtime"]: if self.allocation_strategy == "profile": # Perform per-profile allocation. - size_to_allocate = self.engine.get_device_memory_size_for_profile(self.context.active_optimization_profile) - elif self.allocation_strategy =="runtime": + size_to_allocate = self.engine.get_device_memory_size_for_profile( + self.context.active_optimization_profile + ) + elif self.allocation_strategy == "runtime": # Perform runtime allocation. size_to_allocate = self.context.update_device_memory_size_for_shapes() @@ -378,7 +390,12 @@ def get_io(mode): if copy_outputs_to_host: raw_array = _get_array_on_cpu( - raw_array, name, self.host_output_buffers, self.stream, nbytes, use_torch=use_torch + raw_array, + name, + self.host_output_buffers, + self.stream, + nbytes, + use_torch=use_torch, ) if using_vectorized_format: @@ -448,4 +465,5 @@ def deactivate_impl(self): self.host_output_buffers, self.stream, self.context_memory_buffer, + self.output_allocator, ) diff --git a/tools/Polygraphy/polygraphy/backend/trt/util.py b/tools/Polygraphy/polygraphy/backend/trt/util.py index ec58d2eb..6eb69456 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/util.py +++ b/tools/Polygraphy/polygraphy/backend/trt/util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -791,9 +791,11 @@ def dtype_from_fmt_dtype(contents): name=elem["Name"], dtype=dtype_from_fmt_dtype(elem["Format/Datatype"]), shape=elem["Dimensions"], - docstring=f"Format: {elem['Format/Datatype']}" - if "N/A" not in elem["Format/Datatype"] - else None, + docstring=( + f"Format: {elem['Format/Datatype']}" + if "N/A" not in elem["Format/Datatype"] + else None + ), ) return names, meta diff --git a/tools/Polygraphy/polygraphy/common/interface.py b/tools/Polygraphy/polygraphy/common/interface.py index 0b368015..09d1a66f 100644 --- a/tools/Polygraphy/polygraphy/common/interface.py +++ b/tools/Polygraphy/polygraphy/common/interface.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/common/struct.py b/tools/Polygraphy/polygraphy/common/struct.py index 1e99bcac..cd60d43c 100644 --- a/tools/Polygraphy/polygraphy/common/struct.py +++ b/tools/Polygraphy/polygraphy/common/struct.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -129,7 +129,13 @@ def add(self, name, dtype, shape, min_shape=None, max_shape=None, docstring=None The newly added entry. """ self[name] = MetadataTuple( - dtype, BoundedShape(shape, min=min_shape, max=max_shape) if shape is not None else None, docstring + dtype, + ( + BoundedShape(shape, min=min_shape, max=max_shape) + if shape is not None + else None + ), + docstring, ) return self diff --git a/tools/Polygraphy/polygraphy/comparator/comparator.py b/tools/Polygraphy/polygraphy/comparator/comparator.py index 282050d0..7a70a9a9 100644 --- a/tools/Polygraphy/polygraphy/comparator/comparator.py +++ b/tools/Polygraphy/polygraphy/comparator/comparator.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -96,10 +96,14 @@ def execute_runner(runner, loader_cache): with runner as active_runner: # DataLoaderCache will ensure that the feed_dict does not contain any extra entries # based on the provided input_metadata. - loader_cache.set_input_metadata(active_runner.get_input_metadata(use_numpy_dtypes=False)) + loader_cache.set_input_metadata( + active_runner.get_input_metadata(use_numpy_dtypes=False) + ) if warm_up: - G_LOGGER.start(f"{active_runner.name:35} | Running {warm_up} warm-up run(s)") + G_LOGGER.start( + f"{active_runner.name:35} | Running {warm_up} warm-up run(s)" + ) try: feed_dict = loader_cache[0] except IndexError: @@ -107,11 +111,15 @@ def execute_runner(runner, loader_cache): f"{warm_up} warm-up run(s) were requested, but data loader did not supply any data. Skipping warm-up run(s)" ) else: - G_LOGGER.ultra_verbose(f"Warm-up Input Buffers:\n{util.indent_block(feed_dict)}") + G_LOGGER.ultra_verbose( + f"Warm-up Input Buffers:\n{util.indent_block(feed_dict)}" + ) # First do a few warm-up runs, and don't time them. for _ in range(warm_up): active_runner.infer(feed_dict=feed_dict) - G_LOGGER.finish(f"{active_runner.name:35} | Finished {warm_up} warm-up run(s)") + G_LOGGER.finish( + f"{active_runner.name:35} | Finished {warm_up} warm-up run(s)" + ) # Then, actual iterations. index = 0 @@ -133,7 +141,11 @@ def execute_runner(runner, loader_cache): total_runtime += runtime # Without a deep copy here, outputs will always reference the output of the last run iteration_results.append( - IterationResult(outputs=copy.deepcopy(outputs), runtime=runtime, runner_name=active_runner.name) + IterationResult( + outputs=copy.deepcopy(outputs), + runtime=runtime, + runner_name=active_runner.name, + ) ) G_LOGGER.info( @@ -175,7 +187,10 @@ def execute_runner_with_queue(runner_queue, runner, loader_cache): G_LOGGER.start(f"{runner.name:35} | Activating and starting inference") if use_subprocess: runner_queue = Queue() - process = Process(target=execute_runner_with_queue, args=(runner_queue, runner, loader_cache)) + process = Process( + target=execute_runner_with_queue, + args=(runner_queue, runner, loader_cache), + ) process.start() # If a subprocess hangs in a certain way, then process.join could block forever. Hence, @@ -187,7 +202,9 @@ def execute_runner_with_queue(runner_queue, runner, loader_cache): runner_queue, timeout=subprocess_polling_interval / 2 ) # Receive updated loader cache, or fall back if it could not be sent. - loader_cache = util.try_receive_on_queue(runner_queue, timeout=subprocess_polling_interval / 2) + loader_cache = util.try_receive_on_queue( + runner_queue, timeout=subprocess_polling_interval / 2 + ) except queue.Empty: G_LOGGER.extra_verbose("Polled subprocess - still running") @@ -227,7 +244,9 @@ def postprocess(run_results, postprocess_func): Returns: RunResults: The updated run results. """ - G_LOGGER.start(f"Applying post-processing to outputs: {postprocess_func.__name__}") + G_LOGGER.start( + f"Applying post-processing to outputs: {postprocess_func.__name__}" + ) for _, iteration_results in run_results: for index, iter_res in enumerate(iteration_results): iteration_results[index] = postprocess_func(iter_res) @@ -240,7 +259,9 @@ def default_comparisons(run_results): return [(i, i + 1) for i in range(len(run_results) - 1)] @staticmethod - def compare_accuracy(run_results, fail_fast=False, comparisons=None, compare_func=None): + def compare_accuracy( + run_results, fail_fast=False, comparisons=None, compare_func=None + ): """ Args: run_results (RunResults): The result of Comparator.run() @@ -268,11 +289,16 @@ def find_mismatched(match_dict): return [name for name, matched in match_dict.items() if not bool(matched)] compare_func = util.default(compare_func, CompareFunc.simple()) - comparisons = util.default(comparisons, Comparator.default_comparisons(run_results)) + comparisons = util.default( + comparisons, Comparator.default_comparisons(run_results) + ) accuracy_result = AccuracyResult() for runner0_index, runner1_index in comparisons: - (runner0_name, results0), (runner1_name, results1) = run_results[runner0_index], run_results[runner1_index] + (runner0_name, results0), (runner1_name, results1) = ( + run_results[runner0_index], + run_results[runner1_index], + ) G_LOGGER.start(f"Accuracy Comparison | {runner0_name} vs. {runner1_name}") with G_LOGGER.indent(): @@ -293,7 +319,9 @@ def find_mismatched(match_dict): if fail_fast and mismatched_outputs: return accuracy_result - G_LOGGER.extra_verbose(f"Finished comparing {runner0_name} with {runner1_name}") + G_LOGGER.extra_verbose( + f"Finished comparing {runner0_name} with {runner1_name}" + ) passed, _, total = accuracy_result.stats(runner_pair) pass_rate = accuracy_result.percentage(runner_pair) * 100.0 @@ -327,20 +355,26 @@ def validate(run_results, check_inf=None, check_nan=None, fail_fast=None): def is_finite(output): non_finite = util.array.logical_not(util.array.isfinite(output)) if util.array.any(non_finite): - G_LOGGER.error("Inf Detected | One or more non-finite values were encountered in this output") + G_LOGGER.error( + "Inf Detected | One or more non-finite values were encountered in this output" + ) G_LOGGER.info( "Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display non-finite values", mode=LogMode.ONCE, ) G_LOGGER.extra_verbose(f"Note: non-finite values at:\n{non_finite}") - G_LOGGER.extra_verbose(f"Note: non-finite values:\n{output[non_finite]}") + G_LOGGER.extra_verbose( + f"Note: non-finite values:\n{output[non_finite]}" + ) return False return True def is_not_nan(output): nans = util.array.isnan(output) if util.array.any(nans): - G_LOGGER.error("NaN Detected | One or more NaNs were encountered in this output") + G_LOGGER.error( + "NaN Detected | One or more NaNs were encountered in this output" + ) G_LOGGER.info( "Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display locations of NaNs", mode=LogMode.ONCE, diff --git a/tools/Polygraphy/polygraphy/comparator/compare.py b/tools/Polygraphy/polygraphy/comparator/compare.py index fa7cb7c7..b4dc362a 100644 --- a/tools/Polygraphy/polygraphy/comparator/compare.py +++ b/tools/Polygraphy/polygraphy/comparator/compare.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/comparator/data_loader.py b/tools/Polygraphy/polygraphy/comparator/data_loader.py index 7dc11cc2..e5dbbc62 100644 --- a/tools/Polygraphy/polygraphy/comparator/data_loader.py +++ b/tools/Polygraphy/polygraphy/comparator/data_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -47,20 +47,20 @@ def __init__(self, data_loader_backend_module, seed): self.data_loader_backend_module = data_loader_backend_module - if self.data_loader_backend_module == "numpy": self.rng = np.random.RandomState(seed) elif self.data_loader_backend_module == "torch": self.rng = torch.Generator() self.rng.manual_seed(seed) - def sample_integer(self, shape, dtype, low, high): """ Samples an array containing integral values in the range [low, high], inclusive """ dtype = ( - DataType.to_dtype(DataType.from_dtype(dtype), self.data_loader_backend_module) + DataType.to_dtype( + DataType.from_dtype(dtype), self.data_loader_backend_module + ) if dtype is not None else dtype ) @@ -87,7 +87,9 @@ def sample_float(self, shape, dtype, fmin, fmax): scale = fmax dtype = ( - DataType.to_dtype(DataType.from_dtype(dtype), self.data_loader_backend_module) + DataType.to_dtype( + DataType.from_dtype(dtype), self.data_loader_backend_module + ) if dtype is not None else dtype ) @@ -100,7 +102,9 @@ def sample_float(self, shape, dtype, fmin, fmax): def constant_array(self, shape, dtype): dtype = ( - DataType.to_dtype(DataType.from_dtype(dtype), self.data_loader_backend_module) + DataType.to_dtype( + DataType.from_dtype(dtype), self.data_loader_backend_module + ) if dtype is not None else dtype ) diff --git a/tools/Polygraphy/polygraphy/comparator/postprocess.py b/tools/Polygraphy/polygraphy/comparator/postprocess.py index 0ba09eb0..1689a0a4 100644 --- a/tools/Polygraphy/polygraphy/comparator/postprocess.py +++ b/tools/Polygraphy/polygraphy/comparator/postprocess.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/comparator/struct.py b/tools/Polygraphy/polygraphy/comparator/struct.py index ae66f557..0c68e98d 100644 --- a/tools/Polygraphy/polygraphy/comparator/struct.py +++ b/tools/Polygraphy/polygraphy/comparator/struct.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -37,7 +37,9 @@ def __init__(self, arr): """ self.arr = None self.tmpfile = None - if config.ARRAY_SWAP_THRESHOLD_MB >= 0 and util.array.nbytes(arr) > (config.ARRAY_SWAP_THRESHOLD_MB << 20): + if config.ARRAY_SWAP_THRESHOLD_MB >= 0 and util.array.nbytes(arr) > ( + config.ARRAY_SWAP_THRESHOLD_MB << 20 + ): self.tmpfile = util.NamedTemporaryFile(suffix=".json") G_LOGGER.extra_verbose( f"Evicting large array ({util.array.nbytes(arr) / 1024.0 ** 2:.3f} MiB) from memory and saving to {self.tmpfile.name}" @@ -57,7 +59,9 @@ def load(self): return self.arr if self.tmpfile is None: - G_LOGGER.internal_error(f"self.arr is None but self.tmpfile is also None; this should be impossible.") + G_LOGGER.internal_error( + f"self.arr is None but self.tmpfile is also None; this should be impossible." + ) return load_json(self.tmpfile.name) @@ -176,7 +180,9 @@ def encode(iter_result): @Decoder.register(IterationResult) def decode(dct): - return IterationResult(outputs=dct["outputs"], runtime=dct["runtime"], runner_name=dct["runner_name"]) + return IterationResult( + outputs=dct["outputs"], runtime=dct["runtime"], runner_name=dct["runner_name"] + ) @mod.export() @@ -352,7 +358,14 @@ def __bool__(self): Returns: bool """ - return all([bool(match) for outs in self.values() for out in outs for match in out.values()]) + return all( + [ + bool(match) + for outs in self.values() + for out in outs + for match in out.values() + ] + ) def _get_runner_pair(self, runner_pair): return util.default(runner_pair, list(self.keys())[0]) diff --git a/tools/Polygraphy/polygraphy/comparator/util.py b/tools/Polygraphy/polygraphy/comparator/util.py index 04d72c77..d8791a9b 100644 --- a/tools/Polygraphy/polygraphy/comparator/util.py +++ b/tools/Polygraphy/polygraphy/comparator/util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -167,11 +167,14 @@ def log_output_stats(output, info_hist=False, runner_name=None, hist_range=None) severity=G_LOGGER.INFO if info_hist else G_LOGGER.VERBOSE, ) G_LOGGER.log( - lambda: str_histogram(output, hist_range), severity=G_LOGGER.INFO if info_hist else G_LOGGER.VERBOSE + lambda: str_histogram(output, hist_range), + severity=G_LOGGER.INFO if info_hist else G_LOGGER.VERBOSE, ) -def build_heatmaps(arr, min_val, max_val, prefix, save_dir=None, show=None, use_lognorm=None): +def build_heatmaps( + arr, min_val, max_val, prefix, save_dir=None, show=None, use_lognorm=None +): """ Display an array as an image or set of images. The last two dimensions are interpreted as the height and width and the leading dimensions are flattened and treated as the number @@ -196,10 +199,18 @@ def build_heatmaps(arr, min_val, max_val, prefix, save_dir=None, show=None, use_ shape = util.array.shape(arr) if len(shape) < 3: - arr = util.array.view(arr, dtype=util.array.dtype(arr), shape=([1] * (3 - len(shape))) + list(shape)) + arr = util.array.view( + arr, + dtype=util.array.dtype(arr), + shape=([1] * (3 - len(shape))) + list(shape), + ) original_shape = util.array.shape(arr) - arr = util.array.view(arr, dtype=util.array.dtype(arr), shape=(-1, original_shape[-2], original_shape[-1])) + arr = util.array.view( + arr, + dtype=util.array.dtype(arr), + shape=(-1, original_shape[-2], original_shape[-1]), + ) shape = util.array.shape(arr) num_images = shape[0] @@ -226,7 +237,9 @@ def coord_str_from_img_idx(img_idx): # Populate each image in each figure. for fig_idx in range(num_figures): - fig, axs = plt.subplots(num_rows, num_cols, squeeze=False, dpi=200, constrained_layout=True) + fig, axs = plt.subplots( + num_rows, num_cols, squeeze=False, dpi=200, constrained_layout=True + ) base_img_idx = fig_idx * num_images_per_figure try: @@ -258,7 +271,11 @@ def coord_str_from_img_idx(img_idx): title = "Out Of Bounds" ax.set_title(title, fontsize=FONT_SIZE) - images.append(ax.imshow(img, cmap="plasma", filternorm=False, resample=False)) + images.append( + ax.imshow( + img, cmap="plasma", filternorm=False, resample=False + ) + ) for im in images: im.set_norm(norm) @@ -306,7 +323,9 @@ def scatter_plot_error_magnitude( save_dir (Optional[str]): Path to a directory in which to save images of the plots. show (Optional[bool]): Whether to display the error metrics plot. """ - G_LOGGER.start(f"Building error metrics plot for {out0_name}. This may take a while...") + G_LOGGER.start( + f"Building error metrics plot for {out0_name}. This may take a while..." + ) with G_LOGGER.indent(): title = f"Error metrics between output0 and output1\noutput0: {runner0_name:35} | {out0_name}\noutput1: {runner1_name:35} | {out1_name}" fname = f"error_metrics_{out0_name}.png" @@ -359,7 +378,9 @@ def set_log_ax(ax, min_diff, max_diff): label_suffix = " (log scale)" else: set_linear_ax(axs[1]) - axs[1].set(xlabel="output1 magnitude", ylabel=f"Relative error{label_suffix}") + axs[1].set( + xlabel="output1 magnitude", ylabel=f"Relative error{label_suffix}" + ) if save_dir is not None: path = os.path.join(save_dir, fname) diff --git a/tools/Polygraphy/polygraphy/config.py b/tools/Polygraphy/polygraphy/config.py index 5db39762..478fd407 100644 --- a/tools/Polygraphy/polygraphy/config.py +++ b/tools/Polygraphy/polygraphy/config.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,7 +17,9 @@ import os import sys -INTERNAL_CORRECTNESS_CHECKS = bool(os.environ.get("POLYGRAPHY_INTERNAL_CORRECTNESS_CHECKS", "0") != "0") +INTERNAL_CORRECTNESS_CHECKS = bool( + os.environ.get("POLYGRAPHY_INTERNAL_CORRECTNESS_CHECKS", "0") != "0" +) """ bool: Whether internal correctness checks are enabled. This can be configured by setting the 'POLYGRAPHY_INTERNAL_CORRECTNESS_CHECKS' environment variable. @@ -36,7 +38,9 @@ This can be configured by setting the 'POLYGRAPHY_ASK_BEFORE_INSTALL' environment variable. """ -INSTALL_CMD = os.environ.get("POLYGRAPHY_INSTALL_CMD", f"{sys.executable} -m pip install").split() +INSTALL_CMD = os.environ.get( + "POLYGRAPHY_INSTALL_CMD", f"{sys.executable} -m pip install" +).split() """ List[str]: The command to use to automatically install dependencies. Only relevant when AUTOINSTALL_DEPS is enabled. Defaults to ``["python", "-m", "pip", "install"]``. @@ -44,7 +48,9 @@ string containing the command; for example: ``python3 -m pip install``. """ -ARRAY_SWAP_THRESHOLD_MB = int(os.environ.get("POLYGRAPHY_ARRAY_SWAP_THRESHOLD_MB", "-1")) +ARRAY_SWAP_THRESHOLD_MB = int( + os.environ.get("POLYGRAPHY_ARRAY_SWAP_THRESHOLD_MB", "-1") +) """ int: The threshold, in megabytes, above which Polygraphy will evict an array from memory and swap it to disk. A negative value disables swapping and a value of 0 causes all arrays to be saved to disk. diff --git a/tools/Polygraphy/polygraphy/constants.py b/tools/Polygraphy/polygraphy/constants.py index a8dcc20e..694676d4 100644 --- a/tools/Polygraphy/polygraphy/constants.py +++ b/tools/Polygraphy/polygraphy/constants.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/cuda/cuda.py b/tools/Polygraphy/polygraphy/cuda/cuda.py index b7ff77be..2b6e8cd3 100644 --- a/tools/Polygraphy/polygraphy/cuda/cuda.py +++ b/tools/Polygraphy/polygraphy/cuda/cuda.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -73,12 +73,16 @@ def __init__(self): lib_pat = "libcudart.so*" fallback_lib = "libcudart.so" - cuda_paths = list(filter(lambda x: x, cuda_paths)) # Filter out empty paths (i.e. "") + cuda_paths = list( + filter(lambda x: x, cuda_paths) + ) # Filter out empty paths (i.e. "") candidates = util.find_in_dirs(lib_pat, cuda_paths) if not candidates: log_func = G_LOGGER.critical if fallback_lib is None else G_LOGGER.warning - log_func(f"Could not find the CUDA runtime library.\nNote: Paths searched were:\n{cuda_paths}") + log_func( + f"Could not find the CUDA runtime library.\nNote: Paths searched were:\n{cuda_paths}" + ) lib = fallback_lib G_LOGGER.warning(f"Attempting to load: '{lib}' using default loader paths") @@ -89,7 +93,9 @@ def __init__(self): self.handle = ctypes.CDLL(lib) if not self.handle: - G_LOGGER.critical("Could not load the CUDA runtime library. Is it on your loader path?") + G_LOGGER.critical( + "Could not load the CUDA runtime library. Is it on your loader path?" + ) @func.constantmethod def check(self, status): @@ -170,9 +176,15 @@ def memcpy(self, dst, src, nbytes, kind, stream_ptr=None): """ nbytes = ctypes.c_size_t(nbytes) # Required to prevent overflow if stream_ptr is not None: - self.check(self.handle.cudaMemcpyAsync(void_ptr(dst), void_ptr(src), nbytes, kind, void_ptr(stream_ptr))) + self.check( + self.handle.cudaMemcpyAsync( + void_ptr(dst), void_ptr(src), nbytes, kind, void_ptr(stream_ptr) + ) + ) else: - self.check(self.handle.cudaMemcpy(void_ptr(dst), void_ptr(src), nbytes, kind)) + self.check( + self.handle.cudaMemcpy(void_ptr(dst), void_ptr(src), nbytes, kind) + ) G_CUDA = None @@ -294,7 +306,9 @@ def dtype(self): try: # For backwards compatibility mod.warn_deprecated( - "Using NumPy data types in DeviceView/DeviceArray", use_instead=None, remove_in="0.50.0" + "Using NumPy data types in DeviceView/DeviceArray", + use_instead=None, + remove_in="0.50.0", ) G_LOGGER.warning( f"In the future, you will need to use `DataType.from_dtype(device_view.dtype).numpy()` to retrieve the NumPy data type" @@ -360,7 +374,9 @@ def __str__(self): return f"DeviceView[(dtype={self._dtype.name}, shape={self.shape}), ptr={hex(self.ptr)}]" def __repr__(self): - return util.make_repr("DeviceView", ptr=self.ptr, shape=self.shape, dtype=self._dtype)[0] + return util.make_repr( + "DeviceView", ptr=self.ptr, shape=self.shape, dtype=self._dtype + )[0] @mod.export() @@ -375,7 +391,11 @@ def __init__(self, shape=None, dtype=None): shape (Tuple[int]): The initial shape of the buffer. dtype (DataType): The data type of the buffer. """ - super().__init__(ptr=0, shape=util.default(shape, tuple()), dtype=util.default(dtype, DataType.FLOAT32)) + super().__init__( + ptr=0, + shape=util.default(shape, tuple()), + dtype=util.default(dtype, DataType.FLOAT32), + ) self.allocated_nbytes = 0 self.resize(self.shape) diff --git a/tools/Polygraphy/polygraphy/datatype/datatype.py b/tools/Polygraphy/polygraphy/datatype/datatype.py index c39a3b38..22a1bb9a 100644 --- a/tools/Polygraphy/polygraphy/datatype/datatype.py +++ b/tools/Polygraphy/polygraphy/datatype/datatype.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/datatype/numpy.py b/tools/Polygraphy/polygraphy/datatype/numpy.py index 5f6fedc5..c668a407 100644 --- a/tools/Polygraphy/polygraphy/datatype/numpy.py +++ b/tools/Polygraphy/polygraphy/datatype/numpy.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,7 +16,11 @@ # from polygraphy import mod, util -from polygraphy.datatype.datatype import DataType, register_dtype_importer, register_dtype_exporter +from polygraphy.datatype.datatype import ( + DataType, + register_dtype_importer, + register_dtype_exporter, +) np = mod.lazy_import("numpy") diff --git a/tools/Polygraphy/polygraphy/datatype/onnx.py b/tools/Polygraphy/polygraphy/datatype/onnx.py index 4fc66579..ff716ad2 100644 --- a/tools/Polygraphy/polygraphy/datatype/onnx.py +++ b/tools/Polygraphy/polygraphy/datatype/onnx.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,7 +16,11 @@ # from polygraphy import mod, util -from polygraphy.datatype.datatype import DataType, register_dtype_importer, register_dtype_exporter +from polygraphy.datatype.datatype import ( + DataType, + register_dtype_importer, + register_dtype_exporter, +) onnx = mod.lazy_import("onnx") @@ -46,7 +50,11 @@ def _get_mapping(): del DATATYPE_FROM_ONNX[None] onnx_type_map = dict(onnx.TensorProto.DataType.items()) - return {onnx_type_map[key]: val for key, val in DATATYPE_FROM_ONNX.items() if key in onnx_type_map} + return { + onnx_type_map[key]: val + for key, val in DATATYPE_FROM_ONNX.items() + if key in onnx_type_map + } @register_dtype_importer("onnx") diff --git a/tools/Polygraphy/polygraphy/datatype/onnxrt.py b/tools/Polygraphy/polygraphy/datatype/onnxrt.py index 56a40129..59ff113a 100644 --- a/tools/Polygraphy/polygraphy/datatype/onnxrt.py +++ b/tools/Polygraphy/polygraphy/datatype/onnxrt.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,7 +16,11 @@ # from polygraphy import util -from polygraphy.datatype.datatype import DataType, register_dtype_importer, register_dtype_exporter +from polygraphy.datatype.datatype import ( + DataType, + register_dtype_importer, + register_dtype_exporter, +) __DATATYPE_FROM_ONNXRT = { "tensor(double)": DataType.FLOAT64, diff --git a/tools/Polygraphy/polygraphy/datatype/tensorrt.py b/tools/Polygraphy/polygraphy/datatype/tensorrt.py index ad327e96..f59f8086 100644 --- a/tools/Polygraphy/polygraphy/datatype/tensorrt.py +++ b/tools/Polygraphy/polygraphy/datatype/tensorrt.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/datatype/torch.py b/tools/Polygraphy/polygraphy/datatype/torch.py index cf217d36..e85e4008 100644 --- a/tools/Polygraphy/polygraphy/datatype/torch.py +++ b/tools/Polygraphy/polygraphy/datatype/torch.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,7 +16,11 @@ # from polygraphy import mod, util -from polygraphy.datatype.datatype import DataType, register_dtype_importer, register_dtype_exporter +from polygraphy.datatype.datatype import ( + DataType, + register_dtype_importer, + register_dtype_exporter, +) torch = mod.lazy_import("torch>=1.13.0") diff --git a/tools/Polygraphy/polygraphy/exception/exception.py b/tools/Polygraphy/polygraphy/exception/exception.py index 27ab1f42..294ab37a 100644 --- a/tools/Polygraphy/polygraphy/exception/exception.py +++ b/tools/Polygraphy/polygraphy/exception/exception.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/func/func.py b/tools/Polygraphy/polygraphy/func/func.py index dbf111ab..77b59995 100644 --- a/tools/Polygraphy/polygraphy/func/func.py +++ b/tools/Polygraphy/polygraphy/func/func.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -110,14 +110,25 @@ def extended_func(*args, **kwargs): func_params = inspect.signature(func).parameters # Special case for when the extended function does not return anything - if len(func_params) == 0 and len(extend_func_ret_tuple) == 1 and extend_func_ret_tuple[0] is None: + if ( + len(func_params) == 0 + and len(extend_func_ret_tuple) == 1 + and extend_func_ret_tuple[0] is None + ): func_retval = func() elif len(extend_func_ret_tuple) == len(func_params): func_retval = func(*extend_func_ret_tuple) - elif len(func_params) == len(extend_func_ret_tuple) + len(args) + len(kwargs): + elif len(func_params) == len(extend_func_ret_tuple) + len(args) + len( + kwargs + ): # We need to turn `extend_func_ret_tuple` into keyword arguments so that it can # be ordered after `**kwargs`. - ret_arg_names = [param.name for param in list(func_params.values())[-len(extend_func_ret_tuple) :]] + ret_arg_names = [ + param.name + for param in list(func_params.values())[ + -len(extend_func_ret_tuple) : + ] + ] ret_kwargs = dict(zip(ret_arg_names, extend_func_ret_tuple)) func_retval = func(*args, **kwargs, **ret_kwargs) else: diff --git a/tools/Polygraphy/polygraphy/json/serde.py b/tools/Polygraphy/polygraphy/json/serde.py index fcc73180..607457b8 100644 --- a/tools/Polygraphy/polygraphy/json/serde.py +++ b/tools/Polygraphy/polygraphy/json/serde.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -176,7 +176,9 @@ def __call__(self, pairs): # Handle legacy naming first - these keys should not be present in JSON generated by more recent versions of Polygraphy. for type_str, func in self.polygraphy_registered.items(): - if type_str in dct and dct[type_str] == constants.LEGACY_TYPE_MARKER: # Found a custom type! + if ( + type_str in dct and dct[type_str] == constants.LEGACY_TYPE_MARKER + ): # Found a custom type! return func(dct) type_name = dct.get(constants.TYPE_MARKER) @@ -245,7 +247,11 @@ def load(mode="base64"): NUMPY_REGISTRATION_SUCCESS = True global TORCH_REGISTRATION_SUCCESS - if not TORCH_REGISTRATION_SUCCESS and torch.is_installed() and torch.is_importable(): + if ( + not TORCH_REGISTRATION_SUCCESS + and torch.is_installed() + and torch.is_importable() + ): @Encoder.register(torch.Tensor) def encode(tensor): diff --git a/tools/Polygraphy/polygraphy/logger/logger.py b/tools/Polygraphy/polygraphy/logger/logger.py index a8455360..7260ee9e 100644 --- a/tools/Polygraphy/polygraphy/logger/logger.py +++ b/tools/Polygraphy/polygraphy/logger/logger.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -188,7 +188,9 @@ class Logger: CRITICAL: "light_red", } - def __init__(self, severity=INFO, colors=True, letter=True, timestamp=False, line_info=False): + def __init__( + self, severity=INFO, colors=True, letter=True, timestamp=False, line_info=False + ): """ Args: severity (Union[int, Dict[str, int]]): @@ -465,7 +467,9 @@ def backtrace(self, depth=0, limit=None, severity=ERROR): ) # Info provides 1 stack frame limit = max(limit, 0) frame = sys._getframe(depth + 2) - self.log(" ".join(traceback.format_stack(f=frame, limit=limit)), severity=severity) + self.log( + " ".join(traceback.format_stack(f=frame, limit=limit)), severity=severity + ) def ultra_verbose(self, message, mode=LogMode.EACH): """ diff --git a/tools/Polygraphy/polygraphy/mod/exporter.py b/tools/Polygraphy/polygraphy/mod/exporter.py index cc592fc1..398f9465 100644 --- a/tools/Polygraphy/polygraphy/mod/exporter.py +++ b/tools/Polygraphy/polygraphy/mod/exporter.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -105,7 +105,9 @@ def find_method(symbol, method): if method in vars(ancestor): return vars(ancestor)[method] - assert False, f"Could not find method: {method} in the inheritance hierarcy of: {symbol}" + assert ( + False + ), f"Could not find method: {method} in the inheritance hierarcy of: {symbol}" def export_impl(func_or_cls): _add_to_all(func_or_cls.__name__, module) @@ -115,13 +117,19 @@ def export_impl(func_or_cls): # have no overlapping parameters. from polygraphy.backend.base import BaseLoader - assert inspect.isclass(func_or_cls), "Decorated type must be a loader to use funcify=True" + assert inspect.isclass( + func_or_cls + ), "Decorated type must be a loader to use funcify=True" assert BaseLoader in inspect.getmro( func_or_cls ), "Decorated type must derive from BaseLoader to use funcify=True" def get_params(method): - return list(inspect.signature(find_method(func_or_cls, method)).parameters.values())[1:] + return list( + inspect.signature( + find_method(func_or_cls, method) + ).parameters.values() + )[1:] def is_variadic(param): return param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD] @@ -141,7 +149,9 @@ def param_names(params): init_params = get_params("__init__") call_impl_params = get_params("call_impl") - assert (set(param_names(call_impl_params)) - set(param_names(init_params))) == set( + assert ( + set(param_names(call_impl_params)) - set(param_names(init_params)) + ) == set( param_names(call_impl_params) ), "Cannot funcify a type where call_impl and __init__ have the same argument names!" @@ -152,14 +162,22 @@ def param_names(params): def build_arg_list(should_include): def str_from_param(p): - return get_param_name(p) + (f"={p.default}" if has_default(p) else "") + return get_param_name(p) + ( + f"={p.default}" if has_default(p) else "" + ) arg_list = [str_from_param(p) for p in init_params if should_include(p)] - arg_list += [str_from_param(p) for p in call_impl_params if should_include(p)] + arg_list += [ + str_from_param(p) for p in call_impl_params if should_include(p) + ] return arg_list - non_default_args = build_arg_list(should_include=lambda p: not is_variadic(p) and not has_default(p)) - default_args = build_arg_list(should_include=lambda p: not is_variadic(p) and has_default(p)) + non_default_args = build_arg_list( + should_include=lambda p: not is_variadic(p) and not has_default(p) + ) + default_args = build_arg_list( + should_include=lambda p: not is_variadic(p) and has_default(p) + ) special_args = build_arg_list(should_include=is_variadic) signature = ", ".join(non_default_args + default_args + special_args) @@ -168,7 +186,9 @@ def str_from_param(p): call_impl_args = ", ".join(param_names(call_impl_params)) def pascal_to_snake(name): - return "".join(f"_{c.lower()}" if c.isupper() else c for c in name).lstrip("_") + return "".join( + f"_{c.lower()}" if c.isupper() else c for c in name + ).lstrip("_") nonlocal func_name func_name = func_name or pascal_to_snake(loader.__name__) @@ -209,13 +229,19 @@ def try_add_method_doc(method): return export_impl -def warn_deprecated(name, use_instead, remove_in, module_name=None, always_show_warning=False): +def warn_deprecated( + name, use_instead, remove_in, module_name=None, always_show_warning=False +): if version(polygraphy.__version__) >= version(remove_in): - G_LOGGER.internal_error(f"{name} should have been removed in version: {remove_in}") + G_LOGGER.internal_error( + f"{name} should have been removed in version: {remove_in}" + ) full_obj_name = f"{module_name}.{name}" if module_name else name - msg = f"{full_obj_name} is deprecated and will be removed in Polygraphy {remove_in}." + msg = ( + f"{full_obj_name} is deprecated and will be removed in Polygraphy {remove_in}." + ) if use_instead is not None: msg += f" Use {use_instead} instead." @@ -245,8 +271,12 @@ def deprecate(remove_in, use_instead, module_name=None, name=None): """ def deprecate_impl(obj): - if config.INTERNAL_CORRECTNESS_CHECKS and version(polygraphy.__version__) >= version(remove_in): - G_LOGGER.internal_error(f"{obj} should have been removed in version: {remove_in}") + if config.INTERNAL_CORRECTNESS_CHECKS and version( + polygraphy.__version__ + ) >= version(remove_in): + G_LOGGER.internal_error( + f"{obj} should have been removed in version: {remove_in}" + ) nonlocal name name = name or obj.__name__ @@ -316,9 +346,12 @@ def export_deprecated_alias(name, remove_in, use_instead=None): module = inspect.getmodule(sys._getframe(1)) def export_deprecated_alias_impl(obj): - new_obj = deprecate(remove_in, use_instead=use_instead or obj.__name__, module_name=module.__name__, name=name)( - obj - ) + new_obj = deprecate( + remove_in, + use_instead=use_instead or obj.__name__, + module_name=module.__name__, + name=name, + )(obj) _define_in_module(name, new_obj, module) return obj diff --git a/tools/Polygraphy/polygraphy/mod/importer.py b/tools/Polygraphy/polygraphy/mod/importer.py index c686653c..bd1ac92c 100644 --- a/tools/Polygraphy/polygraphy/mod/importer.py +++ b/tools/Polygraphy/polygraphy/mod/importer.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -360,10 +360,12 @@ def import_from_script(path, name): def reset_sys_path(): del sys.path[0] + del sys.modules[modname] stack.callback(reset_sys_path) try: + importlib.invalidate_caches() mod = importlib.import_module(modname) return getattr(mod, name) except Exception as err: @@ -372,5 +374,4 @@ def reset_sys_path(): if ext != ".py": err_msg += f"\nThis could be because the extension of the file is not '.py'. Note: The extension is: {ext}" err_msg += f"\nNote: Error was: {err}" - err_msg += f"\nNote: sys.path was: {sys.path}" G_LOGGER.critical(err_msg) diff --git a/tools/Polygraphy/polygraphy/mod/util.py b/tools/Polygraphy/polygraphy/mod/util.py index c21d6780..b7529cdc 100644 --- a/tools/Polygraphy/polygraphy/mod/util.py +++ b/tools/Polygraphy/polygraphy/mod/util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/tools/args/backend/onnx/loader.py b/tools/Polygraphy/polygraphy/tools/args/backend/onnx/loader.py index a07bb457..ff6bb484 100644 --- a/tools/Polygraphy/polygraphy/tools/args/backend/onnx/loader.py +++ b/tools/Polygraphy/polygraphy/tools/args/backend/onnx/loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -25,7 +25,11 @@ from polygraphy.tools.args.base import BaseArgs from polygraphy.tools.args.comparator.data_loader import DataLoaderArgs from polygraphy.tools.args.model import ModelArgs -from polygraphy.tools.script import Script, make_invocable, make_invocable_if_nondefault_kwargs +from polygraphy.tools.script import ( + Script, + make_invocable, + make_invocable_if_nondefault_kwargs, +) onnx_backend = mod.lazy_import("polygraphy.backend.onnx") onnxrt_backend = mod.lazy_import("polygraphy.backend.onnxrt") @@ -153,7 +157,9 @@ def infer_shapes(self, model, force=None): onnx.ModelProto: The model with shapes inferred. """ force = util.default(force, False) - with util.TempAttrChange(self, {"do_shape_inference": True if force else self.do_shape_inference}): + with util.TempAttrChange( + self, {"do_shape_inference": True if force else self.do_shape_inference} + ): loader = args_util.run_script(self.add_to_script, model) return util.invoke_if_callable(loader)[0] @@ -180,18 +186,26 @@ def fallback_inference(self, onnx_model, outputs=None): 2. Metadata for every tensor in the model. """ outputs = util.default(outputs, constants.MARK_ALL) - with G_LOGGER.verbosity(G_LOGGER.module_severity.get(G_LOGGER.module_path(__file__)) + 10): - load_model = onnx_backend.ModifyOutputs(onnx_model, outputs=outputs, copy=True) + with G_LOGGER.verbosity( + G_LOGGER.module_severity.get(G_LOGGER.module_path(__file__)) + 10 + ): + load_model = onnx_backend.ModifyOutputs( + onnx_model, outputs=outputs, copy=True + ) with onnxrt_backend.OnnxrtRunner( onnxrt_backend.SessionFromOnnx(onnx_backend.BytesFromOnnx(load_model)) ) as runner: data_loader = self.arg_groups[DataLoaderArgs].get_data_loader() loader_cache = DataLoaderCache(data_loader) - loader_cache.set_input_metadata(runner.get_input_metadata(use_numpy_dtypes=False)) + loader_cache.set_input_metadata( + runner.get_input_metadata(use_numpy_dtypes=False) + ) feed_dict = loader_cache[0] - with G_LOGGER.verbosity(G_LOGGER.module_severity.get(G_LOGGER.module_path(__file__)) - 10): + with G_LOGGER.verbosity( + G_LOGGER.module_severity.get(G_LOGGER.module_path(__file__)) - 10 + ): G_LOGGER.info( f"Running fallback shape inference using input metadata:\n{TensorMetadata.from_feed_dict(feed_dict)}" ) @@ -262,7 +276,9 @@ def __init__( def add_parser_args_impl(self): if self._output_opt: - params = ([self._output_short_opt] if self._output_short_opt else []) + [f"--{self._output_opt}"] + params = ([self._output_short_opt] if self._output_short_opt else []) + [ + f"--{self._output_opt}" + ] help_msg = "Path to save the ONNX model" if self._allow_multiple_models: help_msg = "Path to a directory in which to save ONNX model(s)" @@ -325,7 +341,9 @@ def parse_impl(self, args): external_data_path = external_data_path[0] or "" self.external_data_path = external_data_path - self.size_threshold = args_util.parse_num_bytes(args_util.get(args, "external_data_size_threshold")) + self.size_threshold = args_util.parse_num_bytes( + args_util.get(args, "external_data_size_threshold") + ) self.all_tensors_to_one_file = args_util.get(args, "all_tensors_to_one_file") def add_to_script_impl(self, script, loader_name): @@ -347,7 +365,9 @@ def add_to_script_impl(self, script, loader_name): # Need to run shape inference again after processing the graph since it may have changed. if self._allow_shape_inference: - loader_name = self.arg_groups[OnnxInferShapesArgs].add_to_script(script, loader_name) + loader_name = self.arg_groups[OnnxInferShapesArgs].add_to_script( + script, loader_name + ) script.add_import(imports=["SaveOnnx"], frm="polygraphy.backend.onnx") loader_name = script.add_loader( @@ -381,9 +401,9 @@ def save_onnx(self, model, path: str = None): attrs = {"path": path, "_disable_add_to_script_check": True} if self._allow_multiple_models: if self.external_data_path is not None: - attrs["external_data_path"] = os.path.basename(os.path.splitext(path)[0]) + ( - self.external_data_path or "_ext_data" - ) + attrs["external_data_path"] = os.path.basename( + os.path.splitext(path)[0] + ) + (self.external_data_path or "_ext_data") with util.TempAttrChange(self, attrs): loader = args_util.run_script(self.add_to_script, model) @@ -435,7 +455,9 @@ def __init__( self._allow_shape_inference = util.default(allow_shape_inference, True) self._outputs_opt_prefix = util.default(outputs_opt_prefix, "onnx-") self._allow_from_tf = util.default(allow_from_tf, False) - self._allow_setting_upper_bounds = util.default(allow_setting_upper_bounds, False) + self._allow_setting_upper_bounds = util.default( + allow_setting_upper_bounds, False + ) def add_parser_args_impl(self): self.group.add_argument( @@ -456,7 +478,9 @@ def add_parser_args_impl(self): default=None, ) - if self._outputs_opt_prefix is not False: # Empty strings should not disable the option + if ( + self._outputs_opt_prefix is not False + ): # Empty strings should not disable the option self.group.add_argument( f"--{self._outputs_opt_prefix}outputs", help="Name(s) of ONNX tensor(s) to mark as output(s). " @@ -500,7 +524,7 @@ def add_parser_args_impl(self): """, nargs="+", default=None, - dest="upper_bounds" + dest="upper_bounds", ) def parse_impl(self, args): @@ -520,9 +544,13 @@ def parse_impl(self, args): self.external_data_dir = args_util.get(args, "external_data_dir") self.ignore_external_data = args_util.get(args, "ignore_external_data") self.convert_to_fp16 = args_util.get(args, "fp_to_fp16") - self.upper_bounds = args_util.parse_arglist_to_dict(args_util.get(args, "upper_bounds")) + self.upper_bounds = args_util.parse_arglist_to_dict( + args_util.get(args, "upper_bounds") + ) - def _add_modify_onnx_outputs(self, script, loader_name, disable_custom_outputs: bool = None): + def _add_modify_onnx_outputs( + self, script, loader_name, disable_custom_outputs: bool = None + ): if disable_custom_outputs: outputs = None exclude_outputs = None @@ -531,10 +559,17 @@ def _add_modify_onnx_outputs(self, script, loader_name, disable_custom_outputs: exclude_outputs = self.exclude_outputs modify_outputs_loader = make_invocable_if_nondefault_kwargs( - "ModifyOnnxOutputs", loader_name, outputs=outputs, exclude_outputs=exclude_outputs + "ModifyOnnxOutputs", + loader_name, + outputs=outputs, + exclude_outputs=exclude_outputs, ) if modify_outputs_loader is not None: - script.add_import(imports="ModifyOutputs", frm="polygraphy.backend.onnx", imp_as="ModifyOnnxOutputs") + script.add_import( + imports="ModifyOutputs", + frm="polygraphy.backend.onnx", + imp_as="ModifyOnnxOutputs", + ) loader_name = script.add_loader( modify_outputs_loader, "modify_outputs", @@ -542,7 +577,9 @@ def _add_modify_onnx_outputs(self, script, loader_name, disable_custom_outputs: return loader_name - def add_to_script_impl(self, script, disable_custom_outputs: bool = None, serialize_model: bool = None): + def add_to_script_impl( + self, script, disable_custom_outputs: bool = None, serialize_model: bool = None + ): """ Args: disable_custom_outputs (bool): @@ -559,10 +596,16 @@ def add_to_script_impl(self, script, disable_custom_outputs: bool = None, serial if model_type.is_onnx(): loader_name = self.arg_groups[ModelArgs].path if self._allow_shape_inference: - loader_name = self.arg_groups[OnnxInferShapesArgs].add_to_script(script, loader_name) + loader_name = self.arg_groups[OnnxInferShapesArgs].add_to_script( + script, loader_name + ) - if loader_name == self.arg_groups[ModelArgs].path: # Shape inference loader isn't being used, have to load. - script.add_import(imports=["OnnxFromPath"], frm="polygraphy.backend.onnx") + if ( + loader_name == self.arg_groups[ModelArgs].path + ): # Shape inference loader isn't being used, have to load. + script.add_import( + imports=["OnnxFromPath"], frm="polygraphy.backend.onnx" + ) loader_str = make_invocable( "OnnxFromPath", self.arg_groups[ModelArgs].path, @@ -575,24 +618,39 @@ def add_to_script_impl(self, script, disable_custom_outputs: bool = None, serial loader_name = self.arg_groups[OnnxFromTfArgs].add_to_script(script) else: - G_LOGGER.critical(f"Model type: {model_type} could not be converted to an ONNX model.") + G_LOGGER.critical( + f"Model type: {model_type} could not be converted to an ONNX model." + ) - loader_name = self._add_modify_onnx_outputs(script, loader_name, disable_custom_outputs=disable_custom_outputs) + loader_name = self._add_modify_onnx_outputs( + script, loader_name, disable_custom_outputs=disable_custom_outputs + ) if self.convert_to_fp16: script.add_import(imports=["ConvertToFp16"], frm="polygraphy.backend.onnx") - loader_name = script.add_loader(make_invocable("ConvertToFp16", loader_name), "convert_to_fp16") + loader_name = script.add_loader( + make_invocable("ConvertToFp16", loader_name), "convert_to_fp16" + ) if self._allow_saving: - loader_name = self.arg_groups[OnnxSaveArgs].add_to_script(script, loader_name) + loader_name = self.arg_groups[OnnxSaveArgs].add_to_script( + script, loader_name + ) if serialize_model: script.add_import(imports=["BytesFromOnnx"], frm="polygraphy.backend.onnx") - loader_name = script.add_loader(make_invocable("BytesFromOnnx", loader_name), "serialize_onnx") + loader_name = script.add_loader( + make_invocable("BytesFromOnnx", loader_name), "serialize_onnx" + ) if self._allow_setting_upper_bounds and self.upper_bounds is not None: script.add_import(imports=["SetUpperBound"], frm="polygraphy.backend.onnx") - loader_name = script.add_loader(make_invocable("SetUpperBound", loader_name, upper_bounds=self.upper_bounds), "set_upper_bound") + loader_name = script.add_loader( + make_invocable( + "SetUpperBound", loader_name, upper_bounds=self.upper_bounds + ), + "set_upper_bound", + ) return loader_name @@ -610,11 +668,23 @@ def must_use_onnx_loader(self, disable_custom_outputs: bool = None): """ tmp_script = Script() inp_loader = "check_needs_modify" - needs_modify = self._add_modify_onnx_outputs(tmp_script, inp_loader, disable_custom_outputs) != inp_loader - needs_shape_inference = self._allow_shape_inference and self.arg_groups[OnnxInferShapesArgs].do_shape_inference - needs_save = self._allow_saving and self.arg_groups[OnnxSaveArgs].path is not None + needs_modify = ( + self._add_modify_onnx_outputs( + tmp_script, inp_loader, disable_custom_outputs + ) + != inp_loader + ) + needs_shape_inference = ( + self._allow_shape_inference + and self.arg_groups[OnnxInferShapesArgs].do_shape_inference + ) + needs_save = ( + self._allow_saving and self.arg_groups[OnnxSaveArgs].path is not None + ) needs_fp16_conversion = self.convert_to_fp16 - needs_setting_upper_bounds = self._allow_setting_upper_bounds and self.upper_bounds is not None + needs_setting_upper_bounds = ( + self._allow_setting_upper_bounds and self.upper_bounds is not None + ) # Currently, other loaders do not support external data, so we must fall back to the ONNX loader if it's present. return ( not self.arg_groups[ModelArgs].model_type.is_onnx() @@ -648,7 +718,12 @@ class OnnxFromTfArgs(BaseArgs): """ def add_parser_args_impl(self): - self.group.add_argument("--opset", help="Opset to use when converting to ONNX", default=None, type=int) + self.group.add_argument( + "--opset", + help="Opset to use when converting to ONNX", + default=None, + type=int, + ) def parse_impl(self, args): """ @@ -670,7 +745,9 @@ def add_to_script_impl(self, script): script.add_import(imports=["OnnxFromTfGraph"], frm="polygraphy.backend.onnx") loader_str = make_invocable( "OnnxFromTfGraph", - self.arg_groups[TfLoadArgs].add_to_script(script, disable_custom_outputs=True), + self.arg_groups[TfLoadArgs].add_to_script( + script, disable_custom_outputs=True + ), opset=self.opset, ) loader_name = script.add_loader(loader_str, "export_onnx_from_tf") diff --git a/tools/Polygraphy/polygraphy/tools/args/backend/onnxrt/loader.py b/tools/Polygraphy/polygraphy/tools/args/backend/onnxrt/loader.py index 1cbeb665..8598bcee 100644 --- a/tools/Polygraphy/polygraphy/tools/args/backend/onnxrt/loader.py +++ b/tools/Polygraphy/polygraphy/tools/args/backend/onnxrt/loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -56,15 +56,18 @@ def parse_impl(self, args): self.providers = args_util.get(args, "providers") def add_to_script_impl(self, script, onnx_name=None): - if onnx_name is None: # default behavior according to self.arg_groups + if onnx_name is None: # default behavior according to self.arg_groups if self.arg_groups[OnnxLoadArgs].must_use_onnx_loader(): - onnx_name = self.arg_groups[OnnxLoadArgs].add_to_script(script, serialize_model=True) + onnx_name = self.arg_groups[OnnxLoadArgs].add_to_script( + script, serialize_model=True + ) else: onnx_name = self.arg_groups[ModelArgs].path script.add_import(imports=["SessionFromOnnx"], frm="polygraphy.backend.onnxrt") loader_name = script.add_loader( - make_invocable("SessionFromOnnx", onnx_name, providers=self.providers), "build_onnxrt_session" + make_invocable("SessionFromOnnx", onnx_name, providers=self.providers), + "build_onnxrt_session", ) return loader_name diff --git a/tools/Polygraphy/polygraphy/tools/args/backend/onnxrt/runner.py b/tools/Polygraphy/polygraphy/tools/args/backend/onnxrt/runner.py index fd9f93f6..f5c051ab 100644 --- a/tools/Polygraphy/polygraphy/tools/args/backend/onnxrt/runner.py +++ b/tools/Polygraphy/polygraphy/tools/args/backend/onnxrt/runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -35,4 +35,8 @@ def get_name_opt_impl(self): def add_to_script_impl(self, script): script.add_import(imports=["OnnxrtRunner"], frm="polygraphy.backend.onnxrt") - script.add_runner(make_invocable("OnnxrtRunner", self.arg_groups[OnnxrtSessionArgs].add_to_script(script))) + script.add_runner( + make_invocable( + "OnnxrtRunner", self.arg_groups[OnnxrtSessionArgs].add_to_script(script) + ) + ) diff --git a/tools/Polygraphy/polygraphy/tools/args/backend/pluginref/runner.py b/tools/Polygraphy/polygraphy/tools/args/backend/pluginref/runner.py index 2d5b5078..9438d9e9 100644 --- a/tools/Polygraphy/polygraphy/tools/args/backend/pluginref/runner.py +++ b/tools/Polygraphy/polygraphy/tools/args/backend/pluginref/runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -36,8 +36,12 @@ def get_name_opt_impl(self): def add_to_script_impl(self, script): script.add_import(imports=["GsFromOnnx"], frm="polygraphy.backend.onnx") - script.add_import(imports=["PluginRefRunner"], frm="polygraphy.backend.pluginref") + script.add_import( + imports=["PluginRefRunner"], frm="polygraphy.backend.pluginref" + ) onnx_name = self.arg_groups[OnnxLoadArgs].add_to_script(script) - loader_name = script.add_loader(make_invocable("GsFromOnnx", onnx_name), "pluginref") + loader_name = script.add_loader( + make_invocable("GsFromOnnx", onnx_name), "pluginref" + ) script.add_runner(make_invocable("PluginRefRunner", loader_name)) diff --git a/tools/Polygraphy/polygraphy/tools/args/backend/runner_select.py b/tools/Polygraphy/polygraphy/tools/args/backend/runner_select.py index 8d3fba18..cb19ba2f 100644 --- a/tools/Polygraphy/polygraphy/tools/args/backend/runner_select.py +++ b/tools/Polygraphy/polygraphy/tools/args/backend/runner_select.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -98,7 +98,9 @@ def add_to_script_impl(self, script): str: The name of the list of runners in the script. """ if not self.runners: - G_LOGGER.warning("No runners have been selected. Inference will not be run!") + G_LOGGER.warning( + "No runners have been selected. Inference will not be run!" + ) for opt in self.runners.keys(): self._opt_to_group_map[opt].add_to_script(script) diff --git a/tools/Polygraphy/polygraphy/tools/args/backend/tf/config.py b/tools/Polygraphy/polygraphy/tools/args/backend/tf/config.py index cfee0a8d..859c3f1f 100644 --- a/tools/Polygraphy/polygraphy/tools/args/backend/tf/config.py +++ b/tools/Polygraphy/polygraphy/tools/args/backend/tf/config.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -35,10 +35,16 @@ def add_parser_args_impl(self): default=None, ) self.group.add_argument( - "--allow-growth", help="Allow GPU memory allocated by TensorFlow to grow", action="store_true", default=None + "--allow-growth", + help="Allow GPU memory allocated by TensorFlow to grow", + action="store_true", + default=None, ) self.group.add_argument( - "--xla", help="[EXPERIMENTAL] Attempt to run graph with xla", action="store_true", default=None + "--xla", + help="[EXPERIMENTAL] Attempt to run graph with xla", + action="store_true", + default=None, ) def parse_impl(self, args): @@ -63,7 +69,9 @@ def add_to_script_impl(self, script): ) if config_loader_str is not None: script.add_import(imports=["CreateConfig"], frm="polygraphy.backend.tf") - config_loader_name = script.add_loader(config_loader_str, "create_tf_config") + config_loader_name = script.add_loader( + config_loader_str, "create_tf_config" + ) else: config_loader_name = None return config_loader_name diff --git a/tools/Polygraphy/polygraphy/tools/args/backend/tf/loader.py b/tools/Polygraphy/polygraphy/tools/args/backend/tf/loader.py index 67213cea..1ff44e8c 100644 --- a/tools/Polygraphy/polygraphy/tools/args/backend/tf/loader.py +++ b/tools/Polygraphy/polygraphy/tools/args/backend/tf/loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -101,7 +101,12 @@ class TfLoadArgs(BaseArgs): - TrtSaveEngineBytesArgs: if allow_tftrt == True """ - def __init__(self, allow_artifacts: bool = None, allow_custom_outputs: bool = None, allow_tftrt: bool = None): + def __init__( + self, + allow_artifacts: bool = None, + allow_custom_outputs: bool = None, + allow_tftrt: bool = None, + ): """ Args: allow_artifacts (bool): @@ -151,7 +156,10 @@ def add_parser_args_impl(self): ) self.group.add_argument( - "--freeze-graph", help="[EXPERIMENTAL] Attempt to freeze the graph", action="store_true", default=None + "--freeze-graph", + help="[EXPERIMENTAL] Attempt to freeze the graph", + action="store_true", + default=None, ) def parse_impl(self, args): @@ -203,13 +211,17 @@ def add_to_script_impl(self, script, disable_custom_outputs=None): loader_id = "load_frozen" loader_str = make_invocable("GraphFromFrozen", model_file) else: - G_LOGGER.critical(f"Model type: {model_type} cannot be imported with TensorFlow.") + G_LOGGER.critical( + f"Model type: {model_type} cannot be imported with TensorFlow." + ) loader_name = script.add_loader(loader_str, loader_id) if self.freeze_graph: script.add_import(imports=["OptimizeGraph"], frm="polygraphy.backend.tf") - loader_name = script.add_loader(make_invocable("OptimizeGraph", loader_name), "optimize_graph") + loader_name = script.add_loader( + make_invocable("OptimizeGraph", loader_name), "optimize_graph" + ) engine_dir = None if self._allow_tftrt: @@ -219,7 +231,11 @@ def add_to_script_impl(self, script, disable_custom_outputs=None): engine_dir = self.arg_groups[TrtSaveEngineBytesArgs].path MODIFY_TF = "ModifyGraphOutputs" - outputs = None if disable_custom_outputs else args_util.get_outputs_for_script(script, self.outputs) + outputs = ( + None + if disable_custom_outputs + else args_util.get_outputs_for_script(script, self.outputs) + ) modify_tf_str = make_invocable(MODIFY_TF, loader_name, outputs=outputs) if modify_tf_str != make_invocable(MODIFY_TF, loader_name): script.add_import(imports=[MODIFY_TF], frm="polygraphy.backend.tf") diff --git a/tools/Polygraphy/polygraphy/tools/args/backend/tf/runner.py b/tools/Polygraphy/polygraphy/tools/args/backend/tf/runner.py index 1c9ea197..4e46114f 100644 --- a/tools/Polygraphy/polygraphy/tools/args/backend/tf/runner.py +++ b/tools/Polygraphy/polygraphy/tools/args/backend/tf/runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -61,7 +61,10 @@ def add_to_script_impl(self, script): script.add_import(imports=["SessionFromGraph"], frm="polygraphy.backend.tf") loader_name = script.add_loader( - make_invocable("SessionFromGraph", graph_name, config=config_name), "build_tf_session" + make_invocable("SessionFromGraph", graph_name, config=config_name), + "build_tf_session", ) - script.add_runner(make_invocable("TfRunner", loader_name, timeline_path=self.timeline_path)) + script.add_runner( + make_invocable("TfRunner", loader_name, timeline_path=self.timeline_path) + ) diff --git a/tools/Polygraphy/polygraphy/tools/args/backend/trt/config.py b/tools/Polygraphy/polygraphy/tools/args/backend/trt/config.py index 51f287e3..10aea683 100644 --- a/tools/Polygraphy/polygraphy/tools/args/backend/trt/config.py +++ b/tools/Polygraphy/polygraphy/tools/args/backend/trt/config.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -25,7 +25,13 @@ from polygraphy.tools.args.base import BaseArgs from polygraphy.tools.args.comparator.data_loader import DataLoaderArgs from polygraphy.tools.args.model import ModelArgs -from polygraphy.tools.script import inline, inline_identifier, make_invocable, make_invocable_if_nondefault, safe +from polygraphy.tools.script import ( + inline, + inline_identifier, + make_invocable, + make_invocable_if_nondefault, + safe, +) def parse_profile_shapes(default_shapes, min_args, opt_args, max_args): @@ -49,7 +55,10 @@ def get_shapes(lst, idx): default_shapes.update(args_util.parse_meta(lst[idx], includes_dtype=False)) # Don't care about dtype, and need to override dynamic dimensions - shapes = {name: util.override_dynamic_shape(shape) for name, (_, shape) in default_shapes.items()} + shapes = { + name: util.override_dynamic_shape(shape) + for name, (_, shape) in default_shapes.items() + } for name, shape in shapes.items(): if tuple(default_shapes[name].shape) != tuple(shape): @@ -80,7 +89,10 @@ def get_shapes(lst, idx): f"Mismatch in input names between optimum shapes ({list(opt_shapes.keys())}) and maximum shapes ({list(max_shapes.keys())})" ) - profile = {name: (min_shapes[name], opt_shapes[name], max_shapes[name]) for name in min_shapes.keys()} + profile = { + name: (min_shapes[name], opt_shapes[name], max_shapes[name]) + for name in min_shapes.keys() + } profiles.append(profile) return profiles @@ -123,8 +135,12 @@ def __init__( Defaults to False. """ super().__init__() - self._precision_constraints_default = util.default(precision_constraints_default, "none") - self._allow_random_data_calib_warning = util.default(allow_random_data_calib_warning, True) + self._precision_constraints_default = util.default( + precision_constraints_default, "none" + ) + self._allow_random_data_calib_warning = util.default( + allow_random_data_calib_warning, True + ) self._allow_custom_input_shapes = util.default(allow_custom_input_shapes, True) self._allow_engine_capability = util.default(allow_engine_capability, False) self._allow_tensor_formats = util.default(allow_tensor_formats, False) @@ -158,10 +174,30 @@ def add_parser_args_impl(self): default=[], ) - self.group.add_argument("--tf32", help="Enable tf32 precision in TensorRT", action="store_true", default=None) - self.group.add_argument("--fp16", help="Enable fp16 precision in TensorRT", action="store_true", default=None) - self.group.add_argument("--bf16", help="Enable bf16 precision in TensorRT", action="store_true", default=None) - self.group.add_argument("--fp8", help="Enable fp8 precision in TensorRT", action="store_true", default=None) + self.group.add_argument( + "--tf32", + help="Enable tf32 precision in TensorRT", + action="store_true", + default=None, + ) + self.group.add_argument( + "--fp16", + help="Enable fp16 precision in TensorRT", + action="store_true", + default=None, + ) + self.group.add_argument( + "--bf16", + help="Enable bf16 precision in TensorRT", + action="store_true", + default=None, + ) + self.group.add_argument( + "--fp8", + help="Enable fp8 precision in TensorRT", + action="store_true", + default=None, + ) self.group.add_argument( "--int8", help="Enable int8 precision in TensorRT. " @@ -413,7 +449,7 @@ def add_parser_args_impl(self): "--weight-streaming", help="Build a weight streamable engine. Must be set with --strongly-typed. The weight streaming amount can be set with --weight-streaming-budget.", action="store_true", - default=None + default=None, ) if self._allow_engine_capability: @@ -485,10 +521,14 @@ def parse_impl(self, args): default_shapes = TensorMetadata() if self._allow_custom_input_shapes: if not hasattr(self.arg_groups[ModelArgs], "input_shapes"): - G_LOGGER.internal_error("ModelArgs must be parsed before TrtConfigArgs!") + G_LOGGER.internal_error( + "ModelArgs must be parsed before TrtConfigArgs!" + ) default_shapes = self.arg_groups[ModelArgs].input_shapes - self.profile_dicts = parse_profile_shapes(default_shapes, trt_min_shapes, trt_opt_shapes, trt_max_shapes) + self.profile_dicts = parse_profile_shapes( + default_shapes, trt_min_shapes, trt_opt_shapes, trt_max_shapes + ) self.tf32 = args_util.get(args, "tf32") self.fp16 = args_util.get(args, "fp16") @@ -508,7 +548,9 @@ def parse_impl(self, args): calib_base = args_util.get(args, "calibration_base_class") self.calibration_base_class = None if calib_base is not None: - self.calibration_base_class = inline(safe("trt.{:}", inline_identifier(calib_base))) + self.calibration_base_class = inline( + safe("trt.{:}", inline_identifier(calib_base)) + ) self._quantile = args_util.get(args, "quantile") self._regression_cutoff = args_util.get(args, "regression_cutoff") @@ -523,22 +565,31 @@ def parse_impl(self, args): tactic_sources = args_util.get(args, "tactic_sources") self.tactic_sources = None if tactic_sources is not None: - self.tactic_sources = [make_trt_enum_val("TacticSource", source) for source in tactic_sources] + self.tactic_sources = [ + make_trt_enum_val("TacticSource", source) for source in tactic_sources + ] - self.trt_config_script, self.trt_config_func_name = args_util.parse_script_and_func_name( - args_util.get(args, "trt_config_script"), default_func_name="load_config" + self.trt_config_script, self.trt_config_func_name = ( + args_util.parse_script_and_func_name( + args_util.get(args, "trt_config_script"), + default_func_name="load_config", + ) ) ( self.trt_config_postprocess_script, self.trt_config_postprocess_func_name, ) = args_util.parse_script_and_func_name( - args_util.get(args, "trt_config_postprocess_script"), default_func_name="postprocess_config" + args_util.get(args, "trt_config_postprocess_script"), + default_func_name="postprocess_config", ) func_name = args_util.get(args, "trt_config_func_name") if func_name is not None: mod.warn_deprecated( - "--trt-config-func-name", "the config script argument", "0.50.0", always_show_warning=True + "--trt-config-func-name", + "the config script argument", + "0.50.0", + always_show_warning=True, ) self.trt_config_func_name = func_name @@ -546,7 +597,9 @@ def parse_impl(self, args): self.allow_gpu_fallback = args_util.get(args, "allow_gpu_fallback") memory_pool_limits = args_util.parse_arglist_to_dict( - args_util.get(args, "memory_pool_limit"), cast_to=args_util.parse_num_bytes, allow_empty_key=False + args_util.get(args, "memory_pool_limit"), + cast_to=args_util.parse_num_bytes, + allow_empty_key=False, ) self.memory_pool_limits = None if memory_pool_limits is not None: @@ -558,18 +611,27 @@ def parse_impl(self, args): preview_features = args_util.get(args, "preview_features") self.preview_features = None if preview_features is not None: - self.preview_features = [make_trt_enum_val("PreviewFeature", feature) for feature in preview_features] + self.preview_features = [ + make_trt_enum_val("PreviewFeature", feature) + for feature in preview_features + ] engine_capability = args_util.get(args, "engine_capability") self.engine_capability = None if engine_capability is not None: - self.engine_capability = make_trt_enum_val("EngineCapability", engine_capability) + self.engine_capability = make_trt_enum_val( + "EngineCapability", engine_capability + ) self.direct_io = args_util.get(args, "direct_io") - self.builder_optimization_level = args_util.get(args, "builder_optimization_level") + self.builder_optimization_level = args_util.get( + args, "builder_optimization_level" + ) self.hardware_compatibility_level = None - hardware_compatibility_level = args_util.get(args, "hardware_compatibility_level") + hardware_compatibility_level = args_util.get( + args, "hardware_compatibility_level" + ) if hardware_compatibility_level is not None: self.hardware_compatibility_level = make_trt_enum_val( "HardwareCompatibilityLevel", hardware_compatibility_level @@ -589,14 +651,23 @@ def parse_impl(self, args): quantization_flags = args_util.get(args, "quantization_flags") self.quantization_flags = None if quantization_flags is not None: - self.quantization_flags = [make_trt_enum_val("QuantizationFlag", flag) for flag in quantization_flags] + self.quantization_flags = [ + make_trt_enum_val("QuantizationFlag", flag) + for flag in quantization_flags + ] if self.exclude_lean_runtime and not self.version_compatible: - G_LOGGER.critical(f"`--exclude-lean-runtime` requires `--version-compatible` to be enabled.") + G_LOGGER.critical( + f"`--exclude-lean-runtime` requires `--version-compatible` to be enabled." + ) - self.error_on_timing_cache_miss = args_util.get(args, "error_on_timing_cache_miss") + self.error_on_timing_cache_miss = args_util.get( + args, "error_on_timing_cache_miss" + ) - self.disable_compilation_cache = args_util.get(args, "disable_compilation_cache") + self.disable_compilation_cache = args_util.get( + args, "disable_compilation_cache" + ) self.weight_streaming = args_util.get(args, "weight_streaming") @@ -605,19 +676,29 @@ def add_to_script_impl(self, script): for profile_dict in self.profile_dicts: profile_str = "Profile()" for name in profile_dict.keys(): - profile_str += safe(".add({:}, min={:}, opt={:}, max={:})", name, *profile_dict[name]).unwrap() + profile_str += safe( + ".add({:}, min={:}, opt={:}, max={:})", name, *profile_dict[name] + ).unwrap() profiles.append(profile_str) if profiles: script.add_import(imports=["Profile"], frm="polygraphy.backend.trt") profiles = safe( - "[\n{tab}{:}\n]", inline(safe(f",\n{constants.TAB}".join(profiles))), tab=inline(safe(constants.TAB)) + "[\n{tab}{:}\n]", + inline(safe(f",\n{constants.TAB}".join(profiles))), + tab=inline(safe(constants.TAB)), ) profile_name = script.add_loader(profiles, "profiles") else: profile_name = None calibrator = None - if any(arg is not None for arg in [self.calibration_cache, self.calibration_base_class]) and not self.int8: + if ( + any( + arg is not None + for arg in [self.calibration_cache, self.calibration_base_class] + ) + and not self.int8 + ): G_LOGGER.warning( "Some int8 calibrator options were set, but int8 precision is not enabled. " "Calibration options will be ignored. Please set --int8 to enable calibration. " @@ -632,7 +713,10 @@ def add_to_script_impl(self, script): if ( self.arg_groups[DataLoaderArgs].is_using_random_data() - and (not self.calibration_cache or not os.path.exists(self.calibration_cache)) + and ( + not self.calibration_cache + or not os.path.exists(self.calibration_cache) + ) and self._allow_random_data_calib_warning ): G_LOGGER.warning( @@ -644,7 +728,11 @@ def add_to_script_impl(self, script): calibrator = make_invocable( "Calibrator", - data_loader=data_loader_name if data_loader_name else inline(safe("DataLoader()")), + data_loader=( + data_loader_name + if data_loader_name + else inline(safe("DataLoader()")) + ), cache=self.calibration_cache, BaseClass=self.calibration_base_class, quantile=self._quantile, @@ -675,9 +763,13 @@ def add_to_script_impl(self, script): script.add_import(imports="tensorrt", imp_as="trt") if self.trt_config_script is not None: - script.add_import(imports=["InvokeFromScript"], frm="polygraphy.backend.common") + script.add_import( + imports=["InvokeFromScript"], frm="polygraphy.backend.common" + ) config_loader_str = make_invocable( - "InvokeFromScript", self.trt_config_script, name=self.trt_config_func_name + "InvokeFromScript", + self.trt_config_script, + name=self.trt_config_func_name, ) else: config_loader_str = make_invocable_if_nondefault( @@ -715,28 +807,47 @@ def add_to_script_impl(self, script): weight_streaming=self.weight_streaming, ) if config_loader_str is not None: - script.add_import(imports="CreateConfig", frm="polygraphy.backend.trt", imp_as="CreateTrtConfig") + script.add_import( + imports="CreateConfig", + frm="polygraphy.backend.trt", + imp_as="CreateTrtConfig", + ) if config_loader_str is not None: - config_loader_name = script.add_loader(config_loader_str, "create_trt_config") + config_loader_name = script.add_loader( + config_loader_str, "create_trt_config" + ) else: config_loader_name = None if self.trt_config_postprocess_script is not None: # Need to set up a default config if there isn't one since `PostprocessConfig` will require a config. if config_loader_name is None: - script.add_import(imports="CreateConfig", frm="polygraphy.backend.trt", imp_as="CreateTrtConfig") - config_loader_name = script.add_loader(make_invocable("CreateTrtConfig"), "create_trt_config") + script.add_import( + imports="CreateConfig", + frm="polygraphy.backend.trt", + imp_as="CreateTrtConfig", + ) + config_loader_name = script.add_loader( + make_invocable("CreateTrtConfig"), "create_trt_config" + ) - script.add_import(imports=["InvokeFromScript"], frm="polygraphy.backend.common") script.add_import( - imports=["PostprocessConfig"], frm="polygraphy.backend.trt", imp_as="PostprocessTrtConfig" + imports=["InvokeFromScript"], frm="polygraphy.backend.common" + ) + script.add_import( + imports=["PostprocessConfig"], + frm="polygraphy.backend.trt", + imp_as="PostprocessTrtConfig", ) func = make_invocable( - "InvokeFromScript", self.trt_config_postprocess_script, name=self.trt_config_postprocess_func_name + "InvokeFromScript", + self.trt_config_postprocess_script, + name=self.trt_config_postprocess_func_name, ) config_loader_name = script.add_loader( - make_invocable("PostprocessTrtConfig", config_loader_name, func=func), "postprocess_trt_config" + make_invocable("PostprocessTrtConfig", config_loader_name, func=func), + "postprocess_trt_config", ) return config_loader_name diff --git a/tools/Polygraphy/polygraphy/tools/args/backend/trt/loader.py b/tools/Polygraphy/polygraphy/tools/args/backend/trt/loader.py index 7c0f88f4..21763062 100644 --- a/tools/Polygraphy/polygraphy/tools/args/backend/trt/loader.py +++ b/tools/Polygraphy/polygraphy/tools/args/backend/trt/loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,7 +26,13 @@ from polygraphy.tools.args.backend.trt.helper import make_trt_enum_val from polygraphy.tools.args.base import BaseArgs from polygraphy.tools.args.model import ModelArgs -from polygraphy.tools.script import inline, inline_identifier, make_invocable, make_invocable_if_nondefault_kwargs, safe +from polygraphy.tools.script import ( + inline, + inline_identifier, + make_invocable, + make_invocable_if_nondefault_kwargs, + safe, +) @mod.export() @@ -36,7 +42,12 @@ class TrtLoadPluginsArgs(BaseArgs): """ def add_parser_args_impl(self): - self.group.add_argument("--plugins", help="Path(s) of plugin libraries to load", nargs="+", default=None) + self.group.add_argument( + "--plugins", + help="Path(s) of plugin libraries to load", + nargs="+", + default=None, + ) def parse_impl(self, args): """ @@ -56,7 +67,9 @@ def add_to_script_impl(self, script, loader_name: str): """ if self.plugins: script.add_import(imports=["LoadPlugins"], frm="polygraphy.backend.trt") - loader_str = make_invocable("LoadPlugins", plugins=self.plugins, obj=loader_name) + loader_str = make_invocable( + "LoadPlugins", plugins=self.plugins, obj=loader_name + ) loader_name = script.add_loader(loader_str, "load_plugins") return loader_name @@ -97,7 +110,9 @@ def parse_impl(self, args): flags (List[str]): flags for onnxparser """ self._flags = args_util.get(args, "onnx_flags", default=[]) - self._plugin_instancenorm = args_util.get(args, "plugin_instancenorm", default=None) + self._plugin_instancenorm = args_util.get( + args, "plugin_instancenorm", default=None + ) def get_flags(self): """ @@ -119,7 +134,10 @@ def get_flags(self): ) flags.append("native_instancenorm") - return ([make_trt_enum_val("OnnxParserFlag", f) for f in flags] or None, self._plugin_instancenorm) + return ( + [make_trt_enum_val("OnnxParserFlag", f) for f in flags] or None, + self._plugin_instancenorm, + ) @mod.export() @@ -136,7 +154,10 @@ class TrtLoadNetworkArgs(BaseArgs): """ def __init__( - self, allow_custom_outputs: bool = None, allow_onnx_loading: bool = None, allow_tensor_formats: bool = None + self, + allow_custom_outputs: bool = None, + allow_onnx_loading: bool = None, + allow_tensor_formats: bool = None, ): """ Args: @@ -266,26 +287,38 @@ def parse_impl(self, args): self.layer_precisions = None if layer_precisions is not None: self.layer_precisions = { - name: inline(safe("trt.{}", inline_identifier(value))) for name, value in layer_precisions.items() + name: inline(safe("trt.{}", inline_identifier(value))) + for name, value in layer_precisions.items() } - tensor_datatypes = args_util.parse_arglist_to_dict(args_util.get(args, "tensor_dtypes"), allow_empty_key=False) + tensor_datatypes = args_util.parse_arglist_to_dict( + args_util.get(args, "tensor_dtypes"), allow_empty_key=False + ) self.tensor_datatypes = None if tensor_datatypes is not None: self.tensor_datatypes = { - name: inline(safe("trt.{}", inline_identifier(value))) for name, value in tensor_datatypes.items() + name: inline(safe("trt.{}", inline_identifier(value))) + for name, value in tensor_datatypes.items() } - tensor_formats = args_util.parse_arglist_to_dict(args_util.get(args, "tensor_formats"), allow_empty_key=False) + tensor_formats = args_util.parse_arglist_to_dict( + args_util.get(args, "tensor_formats"), allow_empty_key=False + ) self.tensor_formats = None if tensor_formats is not None: self.tensor_formats = { - name: [inline(safe("trt.TensorFormat.{}", inline_identifier(value.upper()))) for value in values] + name: [ + inline( + safe("trt.TensorFormat.{}", inline_identifier(value.upper())) + ) + for value in values + ] for name, values in tensor_formats.items() } pps = args_util.parse_arglist_to_tuple_list( - args_util.get(args, "trt_network_postprocess_script"), treat_missing_sep_as_val=False + args_util.get(args, "trt_network_postprocess_script"), + treat_missing_sep_as_val=False, ) if pps is None: pps = [] @@ -299,13 +332,18 @@ def parse_impl(self, args): self.postprocess_scripts.append((script_path, func)) self.strongly_typed = args_util.get(args, "strongly_typed") - + self.mark_debug = args_util.get(args, "mark_debug") def add_to_script_impl(self, script): network_func_name = self.arg_groups[ModelArgs].extra_model_info if self.trt_network_func_name is not None: - mod.warn_deprecated("--trt-network-func-name", "the model argument", "0.50.0", always_show_warning=True) + mod.warn_deprecated( + "--trt-network-func-name", + "the model argument", + "0.50.0", + always_show_warning=True, + ) network_func_name = self.trt_network_func_name model_file = self.arg_groups[ModelArgs].path @@ -314,12 +352,21 @@ def add_to_script_impl(self, script): parser_flags, plugin_instancenorm = self.arg_groups[TrtOnnxFlagArgs].get_flags() if any( - arg is not None for arg in [self.layer_precisions, self.tensor_datatypes, self.tensor_formats, parser_flags, plugin_instancenorm] + arg is not None + for arg in [ + self.layer_precisions, + self.tensor_datatypes, + self.tensor_formats, + parser_flags, + plugin_instancenorm, + ] ): script.add_import(imports="tensorrt", imp_as="trt") if model_type == "trt-network-script": - script.add_import(imports=["InvokeFromScript"], frm="polygraphy.backend.common") + script.add_import( + imports=["InvokeFromScript"], frm="polygraphy.backend.common" + ) loader_str = make_invocable( "InvokeFromScript", model_file, @@ -327,56 +374,82 @@ def add_to_script_impl(self, script): ) loader_name = script.add_loader(loader_str, "load_network") elif self._allow_onnx_loading: - if self.arg_groups[OnnxLoadArgs].must_use_onnx_loader(disable_custom_outputs=True): + if self.arg_groups[OnnxLoadArgs].must_use_onnx_loader( + disable_custom_outputs=True + ): # When loading from ONNX, we need to disable custom outputs since TRT requires dtypes on outputs, # which our marking function doesn't guarantee. - script.add_import(imports=["NetworkFromOnnxBytes"], frm="polygraphy.backend.trt") + script.add_import( + imports=["NetworkFromOnnxBytes"], frm="polygraphy.backend.trt" + ) onnx_loader = self.arg_groups[OnnxLoadArgs].add_to_script( script, disable_custom_outputs=True, serialize_model=True ) loader_str = make_invocable( "NetworkFromOnnxBytes", - self.arg_groups[TrtLoadPluginsArgs].add_to_script(script, onnx_loader), + self.arg_groups[TrtLoadPluginsArgs].add_to_script( + script, onnx_loader + ), flags=parser_flags, plugin_instancenorm=plugin_instancenorm, strongly_typed=self.strongly_typed, ) loader_name = script.add_loader(loader_str, "parse_network_from_onnx") else: - script.add_import(imports=["NetworkFromOnnxPath"], frm="polygraphy.backend.trt") + script.add_import( + imports=["NetworkFromOnnxPath"], frm="polygraphy.backend.trt" + ) loader_str = make_invocable( "NetworkFromOnnxPath", - self.arg_groups[TrtLoadPluginsArgs].add_to_script(script, model_file), + self.arg_groups[TrtLoadPluginsArgs].add_to_script( + script, model_file + ), flags=parser_flags, plugin_instancenorm=plugin_instancenorm, strongly_typed=self.strongly_typed, ) loader_name = script.add_loader(loader_str, "parse_network_from_onnx") else: - G_LOGGER.internal_error("Loading from ONNX is not enabled and a network script was not provided!") + G_LOGGER.internal_error( + "Loading from ONNX is not enabled and a network script was not provided!" + ) def add_loader_if_nondefault(loader, result_var_name, **kwargs): - loader_str = make_invocable_if_nondefault_kwargs(loader, loader_name, **kwargs) + loader_str = make_invocable_if_nondefault_kwargs( + loader, loader_name, **kwargs + ) if loader_str is not None: script.add_import(imports=[loader], frm="polygraphy.backend.trt") return script.add_loader(loader_str, result_var_name) return loader_name for i, (script_path, func_name) in enumerate(self.postprocess_scripts): - script.add_import(imports=["InvokeFromScript"], frm="polygraphy.backend.common") + script.add_import( + imports=["InvokeFromScript"], frm="polygraphy.backend.common" + ) pps = make_invocable("InvokeFromScript", script_path, name=func_name) loader_name = add_loader_if_nondefault( - "PostprocessNetwork", f"postprocess_step_{i}", func=pps, name=f"{script_path}:{func_name}" + "PostprocessNetwork", + f"postprocess_step_{i}", + func=pps, + name=f"{script_path}:{func_name}", ) loader_name = add_loader_if_nondefault( - "ModifyNetworkOutputs", "set_network_outputs", outputs=outputs, exclude_outputs=self.exclude_outputs + "ModifyNetworkOutputs", + "set_network_outputs", + outputs=outputs, + exclude_outputs=self.exclude_outputs, ) loader_name = add_loader_if_nondefault( - "SetLayerPrecisions", "set_layer_precisions", layer_precisions=self.layer_precisions + "SetLayerPrecisions", + "set_layer_precisions", + layer_precisions=self.layer_precisions, ) loader_name = add_loader_if_nondefault( - "SetTensorDatatypes", "set_tensor_datatypes", tensor_datatypes=self.tensor_datatypes + "SetTensorDatatypes", + "set_tensor_datatypes", + tensor_datatypes=self.tensor_datatypes, ) loader_name = add_loader_if_nondefault( "SetTensorFormats", "set_tensor_formats", tensor_formats=self.tensor_formats @@ -425,8 +498,15 @@ def __init__(self, output_opt: str = None, output_short_opt: str = None): def add_parser_args_impl(self): if self._output_opt: - params = ([self._output_short_opt] if self._output_short_opt else []) + [f"--{self._output_opt}"] - self.group.add_argument(*params, help="Path to save the TensorRT Engine", dest="save_engine", default=None) + params = ([self._output_short_opt] if self._output_short_opt else []) + [ + f"--{self._output_opt}" + ] + self.group.add_argument( + *params, + help="Path to save the TensorRT Engine", + dest="save_engine", + default=None, + ) def parse_impl(self, args): """ @@ -450,7 +530,10 @@ def add_to_script_impl(self, script, loader_name): return loader_name script.add_import(imports=["SaveBytes"], frm="polygraphy.backend.common") - return script.add_loader(make_invocable("SaveBytes", loader_name, path=self.path), "save_engine_bytes") + return script.add_loader( + make_invocable("SaveBytes", loader_name, path=self.path), + "save_engine_bytes", + ) def save_engine_bytes(self, engine_bytes, path=None): """ @@ -502,7 +585,10 @@ def add_to_script_impl(self, script, loader_name): return loader_name script.add_import(imports=["BytesFromEngine"], frm="polygraphy.backend.trt") - loader_name = script.add_loader(make_invocable("BytesFromEngine", loader_name, path=path), "bytes_from_engine") + loader_name = script.add_loader( + make_invocable("BytesFromEngine", loader_name, path=path), + "bytes_from_engine", + ) return self.arg_groups[TrtSaveEngineArgs].add_to_script(script, loader_name) def save_engine(self, engine, path=None): @@ -571,30 +657,43 @@ def add_to_script_impl(self, script, network_name=None): network_name (str): The name of a variable in the script pointing to a network loader. """ if self.arg_groups[ModelArgs].model_type == "engine": - script.add_import(imports=["BytesFromPath"], frm="polygraphy.backend.common") + script.add_import( + imports=["BytesFromPath"], frm="polygraphy.backend.common" + ) return script.add_loader( - make_invocable("BytesFromPath", self.arg_groups[ModelArgs].path), "load_engine_bytes" + make_invocable("BytesFromPath", self.arg_groups[ModelArgs].path), + "load_engine_bytes", ) network_loader_name = network_name if network_loader_name is None: - network_loader_name = self.arg_groups[TrtLoadNetworkArgs].add_to_script(script) + network_loader_name = self.arg_groups[TrtLoadNetworkArgs].add_to_script( + script + ) - script.add_import(imports=["EngineBytesFromNetwork"], frm="polygraphy.backend.trt") + script.add_import( + imports=["EngineBytesFromNetwork"], frm="polygraphy.backend.trt" + ) config_loader_name = self.arg_groups[TrtConfigArgs].add_to_script(script) - script.add_import(imports=["EngineBytesFromNetwork"], frm="polygraphy.backend.trt") + script.add_import( + imports=["EngineBytesFromNetwork"], frm="polygraphy.backend.trt" + ) loader_str = make_invocable( "EngineBytesFromNetwork", - self.arg_groups[TrtLoadPluginsArgs].add_to_script(script, network_loader_name), + self.arg_groups[TrtLoadPluginsArgs].add_to_script( + script, network_loader_name + ), config=config_loader_name, save_timing_cache=self.save_timing_cache, ) loader_name = script.add_loader(loader_str, "build_engine") if self._allow_saving: - loader_name = self.arg_groups[TrtSaveEngineBytesArgs].add_to_script(script, loader_name) + loader_name = self.arg_groups[TrtSaveEngineBytesArgs].add_to_script( + script, loader_name + ) return loader_name def load_engine_bytes(self, network=None): @@ -645,26 +744,34 @@ def parse_impl(self, args): Path rom which to load a runtime that can be used to load a version compatible engine that excludes the lean runtime. """ - self.load_runtime = args_util.parse_path(args_util.get(args, "load_runtime"), "Runtime") + self.load_runtime = args_util.parse_path( + args_util.get(args, "load_runtime"), "Runtime" + ) def add_to_script_impl(self, script, network_name=None): """ Args: network_name (str): The name of a variable in the script pointing to a network loader. """ - load_serialized_engine = self.arg_groups[TrtLoadEngineBytesArgs].add_to_script(script, network_name) + load_serialized_engine = self.arg_groups[TrtLoadEngineBytesArgs].add_to_script( + script, network_name + ) script.add_import(imports=["EngineFromBytes"], frm="polygraphy.backend.trt") runtime_loader = None if self.load_runtime is not None: script.add_import(imports=["LoadRuntime"], frm="polygraphy.backend.trt") - runtime_loader = script.add_loader(make_invocable("LoadRuntime", self.load_runtime), "load_runtime") + runtime_loader = script.add_loader( + make_invocable("LoadRuntime", self.load_runtime), "load_runtime" + ) return script.add_loader( make_invocable( "EngineFromBytes", - self.arg_groups[TrtLoadPluginsArgs].add_to_script(script, load_serialized_engine), + self.arg_groups[TrtLoadPluginsArgs].add_to_script( + script, load_serialized_engine + ), runtime=runtime_loader, ), "deserialize_engine", diff --git a/tools/Polygraphy/polygraphy/tools/args/backend/trt/runner.py b/tools/Polygraphy/polygraphy/tools/args/backend/trt/runner.py index 79b3546b..29ef1129 100644 --- a/tools/Polygraphy/polygraphy/tools/args/backend/trt/runner.py +++ b/tools/Polygraphy/polygraphy/tools/args/backend/trt/runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -61,7 +61,7 @@ def add_parser_args_impl(self): "0 to 100%%: The percentage of weights TRT will stream. 100%% will stream the maximum number of weights. " ">0B: The exact amount of streamable weights that reside on the GPU (unit suffixes are supported).", type=str, - default=None + default=None, ) def parse_impl(self, args): @@ -78,18 +78,31 @@ def parse_impl(self, args): self.allocation_strategy = args_util.get(args, "allocation_strategy") self.weight_streaming_budget = None self.weight_streaming_percent = None - + ws_arg = args_util.get(args, "weight_streaming_budget") if ws_arg and ws_arg.endswith("%"): percent = float(ws_arg[:-1]) - assert 0 <= percent <= 100, "Invalid percentage for --weight-streaming-budget!" + assert ( + 0 <= percent <= 100 + ), "Invalid percentage for --weight-streaming-budget!" self.weight_streaming_percent = percent elif ws_arg: budget = args_util.parse_num_bytes(ws_arg) - assert budget == -1 or budget >= 0, "Invalid amount for --weight-streaming-budget!" + assert ( + budget == -1 or budget >= 0 + ), "Invalid amount for --weight-streaming-budget!" self.weight_streaming_budget = budget def add_to_script_impl(self, script): script.add_import(imports=["TrtRunner"], frm="polygraphy.backend.trt") loader_name = self.arg_groups[TrtLoadEngineArgs].add_to_script(script) - script.add_runner(make_invocable("TrtRunner", loader_name, optimization_profile=self.optimization_profile, allocation_strategy=self.allocation_strategy, weight_streaming_budget=self.weight_streaming_budget, weight_streaming_percent=self.weight_streaming_percent)) + script.add_runner( + make_invocable( + "TrtRunner", + loader_name, + optimization_profile=self.optimization_profile, + allocation_strategy=self.allocation_strategy, + weight_streaming_budget=self.weight_streaming_budget, + weight_streaming_percent=self.weight_streaming_percent, + ) + ) diff --git a/tools/Polygraphy/polygraphy/tools/args/base.py b/tools/Polygraphy/polygraphy/tools/args/base.py index ca274f00..750ba256 100644 --- a/tools/Polygraphy/polygraphy/tools/args/base.py +++ b/tools/Polygraphy/polygraphy/tools/args/base.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -121,7 +121,9 @@ def add_parser_args(self, parser): num_prev_actions = len(parser._actions) - self.group = parser.add_argument_group(title.strip(), f"Options related to {desc.strip()}") + self.group = parser.add_argument_group( + title.strip(), f"Options related to {desc.strip()}" + ) self.add_parser_args_impl() num_added_actions = len(parser._actions) - num_prev_actions diff --git a/tools/Polygraphy/polygraphy/tools/args/comparator/comparator.py b/tools/Polygraphy/polygraphy/tools/args/comparator/comparator.py index d5528e70..7f05908e 100644 --- a/tools/Polygraphy/polygraphy/tools/args/comparator/comparator.py +++ b/tools/Polygraphy/polygraphy/tools/args/comparator/comparator.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,7 +21,10 @@ from polygraphy.tools.args import util as args_util from polygraphy.tools.args.backend.runner_select import RunnerSelectArgs from polygraphy.tools.args.base import BaseArgs -from polygraphy.tools.args.comparator.compare import CompareFuncIndicesArgs, CompareFuncSimpleArgs +from polygraphy.tools.args.comparator.compare import ( + CompareFuncIndicesArgs, + CompareFuncSimpleArgs, +) from polygraphy.tools.args.comparator.data_loader import DataLoaderArgs from polygraphy.tools.args.comparator.postprocess import ComparatorPostprocessArgs from polygraphy.tools.script import inline, make_invocable, safe @@ -62,7 +65,8 @@ def add_parser_args_impl(self): self.group.add_argument( "--save-outputs", "--save-results", - help="Path to save results from runners. " "The results (RunResults) will be encoded as JSON and saved", + help="Path to save results from runners. " + "The results (RunResults) will be encoded as JSON and saved", default=None, dest="save_outputs_path", ) @@ -95,13 +99,23 @@ def add_to_script_impl(self, script): use_subprocess=self.use_subprocess, save_inputs_path=self.save_inputs_path, ) - script.append_suffix(safe("\n# Runner Execution\n{results} = {:}", comparator_run, results=RESULTS_VAR_NAME)) + script.append_suffix( + safe( + "\n# Runner Execution\n{results} = {:}", + comparator_run, + results=RESULTS_VAR_NAME, + ) + ) if self.save_outputs_path: G_LOGGER.verbose(f"Will save runner results to: {self.save_outputs_path}") script.add_import(imports=["util"], frm="polygraphy") script.append_suffix( - safe("\n# Save results\n{results}.save({:})", self.save_outputs_path, results=RESULTS_VAR_NAME) + safe( + "\n# Save results\n{results}.save({:})", + self.save_outputs_path, + results=RESULTS_VAR_NAME, + ) ) return RESULTS_VAR_NAME @@ -136,9 +150,17 @@ def add_parser_args_impl(self): "indices": self.arg_groups[CompareFuncIndicesArgs], } - self.group.add_argument("--validate", help="Check outputs for NaNs and Infs", action="store_true", default=None) self.group.add_argument( - "--fail-fast", help="Fail fast (stop comparing after the first failure)", action="store_true", default=None + "--validate", + help="Check outputs for NaNs and Infs", + action="store_true", + default=None, + ) + self.group.add_argument( + "--fail-fast", + help="Fail fast (stop comparing after the first failure)", + action="store_true", + default=None, ) self.group.add_argument( @@ -199,8 +221,11 @@ def parse_impl(self, args): f"The selected comparison function is: '{self.compare_func}', so this option will be ignored." ) - self.compare_func_script, self.compare_func_name = args_util.parse_script_and_func_name( - args_util.get(args, "compare_func_script"), default_func_name="compare_outputs" + self.compare_func_script, self.compare_func_name = ( + args_util.parse_script_and_func_name( + args_util.get(args, "compare_func_script"), + default_func_name="compare_outputs", + ) ) def add_to_script_impl(self, script, results_name): @@ -226,25 +251,47 @@ def add_to_script_impl(self, script, results_name): ) if self._allow_postprocessing: - results_name = self.arg_groups[ComparatorPostprocessArgs].add_to_script(script, results_name) + results_name = self.arg_groups[ComparatorPostprocessArgs].add_to_script( + script, results_name + ) SUCCESS_VAR_NAME = inline(safe("success")) script.append_suffix(safe("\n{success} = True", success=SUCCESS_VAR_NAME)) - if len(self.arg_groups[RunnerSelectArgs].runners) > 1 or self.load_outputs_paths: + if ( + len(self.arg_groups[RunnerSelectArgs].runners) > 1 + or self.load_outputs_paths + ): # Only do comparisons if there's actually something to compare. script.append_suffix(safe("# Accuracy Comparison")) if self.compare_func_script is not None: - script.add_import(imports=["InvokeFromScript"], frm="polygraphy.backend.common") - compare_func = make_invocable("InvokeFromScript", self.compare_func_script, name=self.compare_func_name) + script.add_import( + imports=["InvokeFromScript"], frm="polygraphy.backend.common" + ) + compare_func = make_invocable( + "InvokeFromScript", + self.compare_func_script, + name=self.compare_func_name, + ) else: - compare_func = self._comparison_func_map[self.compare_func].add_to_script(script) + compare_func = self._comparison_func_map[ + self.compare_func + ].add_to_script(script) compare_accuracy = make_invocable( - "Comparator.compare_accuracy", results_name, compare_func=compare_func, fail_fast=self.fail_fast + "Comparator.compare_accuracy", + results_name, + compare_func=compare_func, + fail_fast=self.fail_fast, + ) + script.append_suffix( + safe( + "{success} &= bool({:})\n", + compare_accuracy, + success=SUCCESS_VAR_NAME, + ) ) - script.append_suffix(safe("{success} &= bool({:})\n", compare_accuracy, success=SUCCESS_VAR_NAME)) if self.validate: script.append_suffix( safe( diff --git a/tools/Polygraphy/polygraphy/tools/args/comparator/compare.py b/tools/Polygraphy/polygraphy/tools/args/comparator/compare.py index 8816297b..07de2139 100644 --- a/tools/Polygraphy/polygraphy/tools/args/comparator/compare.py +++ b/tools/Polygraphy/polygraphy/tools/args/comparator/compare.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,7 +18,12 @@ from polygraphy.logger import G_LOGGER from polygraphy.tools.args import util as args_util from polygraphy.tools.args.base import BaseArgs -from polygraphy.tools.script import inline, make_invocable, make_invocable_if_nondefault, safe +from polygraphy.tools.script import ( + inline, + make_invocable, + make_invocable_if_nondefault, + safe, +) # # NOTE: The classes here are expected to use `None` as the default value for all arguments. @@ -140,13 +145,17 @@ def parse_impl(self, args): self.no_shape_check = args_util.get(args, "no_shape_check") self.rtol = args_util.parse_arglist_to_dict(args_util.get(args, "rtol")) self.atol = args_util.parse_arglist_to_dict(args_util.get(args, "atol")) - self.check_error_stat = args_util.parse_arglist_to_dict(args_util.get(args, "check_error_stat")) + self.check_error_stat = args_util.parse_arglist_to_dict( + args_util.get(args, "check_error_stat") + ) self.infinities_compare_equal = args_util.get(args, "infinities_compare_equal") self.save_heatmaps = args_util.get(args, "save_heatmaps") self.show_heatmaps = args_util.get(args, "show_heatmaps") self.save_error_metrics_plot = args_util.get(args, "save_error_metrics_plot") self.show_error_metrics_plot = args_util.get(args, "show_error_metrics_plot") - self.error_quantile = args_util.parse_arglist_to_dict(args_util.get(args, "error_quantile")) + self.error_quantile = args_util.parse_arglist_to_dict( + args_util.get(args, "error_quantile") + ) # Without this early check, failure would only happen after inference, which is clearly not desirable. if self.check_error_stat: @@ -172,7 +181,7 @@ def add_to_script_impl(self, script): show_heatmaps=self.show_heatmaps, save_error_metrics_plot=self.save_error_metrics_plot, show_error_metrics_plot=self.show_error_metrics_plot, - error_quantile=self.error_quantile + error_quantile=self.error_quantile, ) compare_func = None if compare_func_str: @@ -211,7 +220,9 @@ def parse_impl(self, args): Attributes: index_tolerance (Dict[str, int]): Per-tensor index tolerance. """ - self.index_tolerance = args_util.parse_arglist_to_dict(args_util.get(args, "index_tolerance")) + self.index_tolerance = args_util.parse_arglist_to_dict( + args_util.get(args, "index_tolerance") + ) def add_to_script_impl(self, script): from polygraphy.tools.args.comparator.comparator import ComparatorCompareArgs diff --git a/tools/Polygraphy/polygraphy/tools/args/comparator/data_loader.py b/tools/Polygraphy/polygraphy/tools/args/comparator/data_loader.py index 635860cb..63ea3558 100644 --- a/tools/Polygraphy/polygraphy/tools/args/comparator/data_loader.py +++ b/tools/Polygraphy/polygraphy/tools/args/comparator/data_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,7 +22,13 @@ from polygraphy.tools.args import util as args_util from polygraphy.tools.args.base import BaseArgs from polygraphy.tools.args.model import ModelArgs -from polygraphy.tools.script import Script, inline, make_invocable, make_invocable_if_nondefault, safe +from polygraphy.tools.script import ( + Script, + inline, + make_invocable, + make_invocable_if_nondefault, + safe, +) @mod.export() @@ -46,7 +52,13 @@ def __init__(self, allow_custom_input_shapes: bool = None): self._allow_custom_input_shapes = util.default(allow_custom_input_shapes, True) def add_parser_args_impl(self): - self.group.add_argument("--seed", metavar="SEED", help="Seed to use for random inputs", type=int, default=None) + self.group.add_argument( + "--seed", + metavar="SEED", + help="Seed to use for random inputs", + type=int, + default=None, + ) self.group.add_argument( "--val-range", help="Range of values to generate in the data loader. " @@ -147,8 +159,12 @@ def omit_none_tuple(tup): self.seed = args_util.get(args, "seed") - self._int_range = omit_none_tuple(tup=(args_util.get(args, "int_min"), args_util.get(args, "int_max"))) - self._float_range = omit_none_tuple(tup=(args_util.get(args, "float_min"), args_util.get(args, "float_max"))) + self._int_range = omit_none_tuple( + tup=(args_util.get(args, "int_min"), args_util.get(args, "int_max")) + ) + self._float_range = omit_none_tuple( + tup=(args_util.get(args, "float_min"), args_util.get(args, "float_max")) + ) if self._int_range or self._float_range: mod.warn_deprecated( "--int-min/--int-max and --float-min/--float-max", @@ -178,18 +194,35 @@ def omit_none_tuple(tup): self.load_inputs_paths = args_util.get(args, "load_inputs_paths") - self.data_loader_backend_module = args_util.get(args, "data_loader_backend_module") + self.data_loader_backend_module = args_util.get( + args, "data_loader_backend_module" + ) - self.data_loader_script, self.data_loader_func_name = args_util.parse_script_and_func_name( - args_util.get(args, "data_loader_script"), default_func_name="load_data" + self.data_loader_script, self.data_loader_func_name = ( + args_util.parse_script_and_func_name( + args_util.get(args, "data_loader_script"), default_func_name="load_data" + ) ) func_name = args_util.get(args, "data_loader_func_name") if func_name is not None: - mod.warn_deprecated("--data-loader-func-name", "--data-loader-script", "0.50.0", always_show_warning=True) + mod.warn_deprecated( + "--data-loader-func-name", + "--data-loader-script", + "0.50.0", + always_show_warning=True, + ) self.data_loader_func_name = func_name if self.load_inputs_paths or self.data_loader_script: - for arg in ["seed", "int_min", "int_max", "float_min", "float_max", "val_range", "iterations"]: + for arg in [ + "seed", + "int_min", + "int_max", + "float_min", + "float_max", + "val_range", + "iterations", + ]: val = args_util.get(args, arg) if val is not None: G_LOGGER.warning( @@ -204,7 +237,9 @@ def _add_to_script_helper(self, script, user_input_metadata_str=None): if self.data_loader_script: script.add_import(imports=["mod"], frm="polygraphy") data_loader = make_invocable( - "mod.import_from_script", self.data_loader_script, name=self.data_loader_func_name + "mod.import_from_script", + self.data_loader_script, + name=self.data_loader_func_name, ) needs_invoke = True elif self.load_inputs_paths: @@ -258,7 +293,9 @@ def add_to_script_impl(self, script, user_input_metadata_str=None): str: The data loader, as a string. This may either be the variable name, or an invocation of the data loader function. """ - data_loader, needs_invoke = self._add_to_script_helper(script, user_input_metadata_str) + data_loader, needs_invoke = self._add_to_script_helper( + script, user_input_metadata_str + ) if needs_invoke: data_loader = make_invocable(data_loader) return data_loader @@ -281,7 +318,10 @@ def add_to_script_wrapper(script, *args, **kwargs): name, needs_invoke = self._add_to_script_helper(script, *args, **kwargs) return name - data_loader = util.default(args_util.run_script(add_to_script_wrapper, user_input_metadata), DataLoader()) + data_loader = util.default( + args_util.run_script(add_to_script_wrapper, user_input_metadata), + DataLoader(), + ) if needs_invoke: data_loader = data_loader() return data_loader diff --git a/tools/Polygraphy/polygraphy/tools/args/comparator/postprocess.py b/tools/Polygraphy/polygraphy/tools/args/comparator/postprocess.py index c12c2a91..3b29b4c2 100644 --- a/tools/Polygraphy/polygraphy/tools/args/comparator/postprocess.py +++ b/tools/Polygraphy/polygraphy/tools/args/comparator/postprocess.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -54,7 +54,9 @@ def parse_impl(self, args): {"top_k": {"output1": 5, "output2": 6}} """ - self.postprocess = args_util.parse_arglist_to_dict(args_util.get(args, "postprocess")) + self.postprocess = args_util.parse_arglist_to_dict( + args_util.get(args, "postprocess") + ) postprocess = {} topk_key = inline(safe("top_k")) @@ -62,7 +64,9 @@ def parse_impl(self, args): postprocess[topk_key] = {} for key, val in self.postprocess.items(): if not val.startswith("top-"): - G_LOGGER.critical(f"Invalid post-processing function: {val}. Note: Valid choices are: ['top-'].") + G_LOGGER.critical( + f"Invalid post-processing function: {val}. Note: Valid choices are: ['top-']." + ) k, _, axis = val.partition(",") k = int(k.lstrip("top-")) if axis: diff --git a/tools/Polygraphy/polygraphy/tools/args/logger/logger.py b/tools/Polygraphy/polygraphy/tools/args/logger/logger.py index ea8b412f..6f9a20b7 100644 --- a/tools/Polygraphy/polygraphy/tools/args/logger/logger.py +++ b/tools/Polygraphy/polygraphy/tools/args/logger/logger.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -64,7 +64,9 @@ def add_parser_args_impl(self): default=None, ) - self.group.add_argument("--silent", help="Disable all output", action="store_true", default=None) + self.group.add_argument( + "--silent", help="Disable all output", action="store_true", default=None + ) self.group.add_argument( "--log-format", help="Format for log messages: {{'timestamp': Include timestamp, 'line-info': Include file and line number, " @@ -123,7 +125,9 @@ def parse_impl(self, args): if verbosity is not None: self.verbosity = {} for path, sev in verbosity.items(): - self.verbosity[path] = inline(safe("G_LOGGER.{:}", inline_identifier(sev.upper()))) + self.verbosity[path] = inline( + safe("G_LOGGER.{:}", inline_identifier(sev.upper())) + ) # Enable logger settings immediately on parsing. self.get_logger() @@ -138,7 +142,9 @@ def add_to_script_impl(self, script): logger_settings.append("G_LOGGER.module_severity = G_LOGGER.CRITICAL") elif self.verbosity is not None: # Need to escape braces of the dictionary so it's not treated as a format-string by `safe()`. - logger_settings.append(f"G_LOGGER.module_severity = {'{' + repr(self.verbosity) + '}'}") + logger_settings.append( + f"G_LOGGER.module_severity = {'{' + repr(self.verbosity) + '}'}" + ) for fmt in self.log_format: if fmt == "no-colors": diff --git a/tools/Polygraphy/polygraphy/tools/args/model.py b/tools/Polygraphy/polygraphy/tools/args/model.py index da2d2be8..99e7bcc6 100644 --- a/tools/Polygraphy/polygraphy/tools/args/model.py +++ b/tools/Polygraphy/polygraphy/tools/args/model.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -106,10 +106,16 @@ def __init__( "Model input(s) and their shape(s). " "Used to determine shapes to use while generating input data for inference", ) - self._guess_model_type_from_runners = util.default(guess_model_type_from_runners, False) + self._guess_model_type_from_runners = util.default( + guess_model_type_from_runners, False + ) def add_parser_args_impl(self): - self.group.add_argument("model_file", help="Path to the model", nargs=None if self._model_opt_required else "?") + self.group.add_argument( + "model_file", + help="Path to the model", + nargs=None if self._model_opt_required else "?", + ) if self._required_model_type is None: self.group.add_argument( @@ -189,16 +195,26 @@ def use_ext(ext_mapping): self.input_shapes = TensorMetadata() if args_util.get(args, "input_shapes"): - self.input_shapes = args_util.parse_meta(args_util.get(args, "input_shapes"), includes_dtype=False) + self.input_shapes = args_util.parse_meta( + args_util.get(args, "input_shapes"), includes_dtype=False + ) self.path = None self.extra_model_info = None - self.path, self.extra_model_info = args_util.parse_script_and_func_name(args_util.get(args, "model_file")) + self.path, self.extra_model_info = args_util.parse_script_and_func_name( + args_util.get(args, "model_file") + ) self.path = args_util.parse_path(self.path, "Model") - model_type_str = self._required_model_type if self._required_model_type else determine_model_type(self.path) - self.model_type = ModelArgs.ModelType(model_type_str) if model_type_str else None + model_type_str = ( + self._required_model_type + if self._required_model_type + else determine_model_type(self.path) + ) + self.model_type = ( + ModelArgs.ModelType(model_type_str) if model_type_str else None + ) # Set up extra_model_info defaults for each model type if self.model_type == "trt-network-script": diff --git a/tools/Polygraphy/polygraphy/tools/args/util/util.py b/tools/Polygraphy/polygraphy/tools/args/util/util.py index 97fda30c..c0bc81ec 100644 --- a/tools/Polygraphy/polygraphy/tools/args/util/util.py +++ b/tools/Polygraphy/polygraphy/tools/args/util/util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -149,7 +149,12 @@ def datatype_from_str(dt_str): @mod.export() def parse_arglist_to_tuple_list( - arg_lst, cast_to=None, sep=None, allow_empty_key=None, treat_missing_sep_as_val=None, treat_unspecified_as_none=None + arg_lst, + cast_to=None, + sep=None, + allow_empty_key=None, + treat_missing_sep_as_val=None, + treat_unspecified_as_none=None, ): """ Generate a list of (key, value) pairs from a list of arguments of the form: @@ -226,7 +231,12 @@ def parse_arglist_to_tuple_list( @mod.export() def parse_arg_to_tuple( - arg, cast_to=None, sep=None, allow_empty_key=None, treat_missing_sep_as_val=None, treat_unspecified_as_none=None + arg, + cast_to=None, + sep=None, + allow_empty_key=None, + treat_missing_sep_as_val=None, + treat_unspecified_as_none=None, ): """ Similar to `parse_arglist_to_tuple_list` but operates on a single argument and returns a single tuple @@ -244,7 +254,12 @@ def parse_arg_to_tuple( return None tuple_list = parse_arglist_to_tuple_list( - [arg], cast_to, sep, allow_empty_key, treat_missing_sep_as_val, treat_unspecified_as_none + [arg], + cast_to, + sep, + allow_empty_key, + treat_missing_sep_as_val, + treat_unspecified_as_none, ) if tuple_list is None: return None @@ -259,7 +274,12 @@ def parse_arg_to_tuple( @mod.export() def parse_arglist_to_dict( - arg_lst, cast_to=None, sep=None, allow_empty_key=None, treat_missing_sep_as_val=None, treat_unspecified_as_none=None + arg_lst, + cast_to=None, + sep=None, + allow_empty_key=None, + treat_missing_sep_as_val=None, + treat_unspecified_as_none=None, ): """ Similar to `parse_arglist_to_tuple_list` but returns a dictionary instead of a list of tuples. @@ -270,7 +290,12 @@ def parse_arglist_to_dict( was not specified). """ tuple_list = parse_arglist_to_tuple_list( - arg_lst, cast_to, sep, allow_empty_key, treat_missing_sep_as_val, treat_unspecified_as_none + arg_lst, + cast_to, + sep, + allow_empty_key, + treat_missing_sep_as_val, + treat_unspecified_as_none, ) if tuple_list is None: return None @@ -284,7 +309,9 @@ def parse_script_and_func_name(arg, default_func_name=None): # On Windows we need to split the drive letter (e.g. 'C:') so it's not confused with the script/function separator. drive_letter, arg = os.path.splitdrive(arg) - script_and_func_name = parse_arg_to_tuple(arg, treat_missing_sep_as_val=False, treat_unspecified_as_none=True) + script_and_func_name = parse_arg_to_tuple( + arg, treat_missing_sep_as_val=False, treat_unspecified_as_none=True + ) if script_and_func_name is not None: script, func_name = script_and_func_name func_name = util.default(func_name, default_func_name) diff --git a/tools/Polygraphy/polygraphy/tools/base/tool.py b/tools/Polygraphy/polygraphy/tools/base/tool.py index 70e100c1..893409f0 100644 --- a/tools/Polygraphy/polygraphy/tools/base/tool.py +++ b/tools/Polygraphy/polygraphy/tools/base/tool.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -33,7 +33,9 @@ class Tool: def __init__(self, name=None): self.name = name - self.arg_groups = ArgGroups() # Populated by setup_parser based on get_subscriptions() + self.arg_groups = ( + ArgGroups() + ) # Populated by setup_parser based on get_subscriptions() def setup_parser(self, subparsers=None): """ @@ -57,7 +59,9 @@ def setup_parser(self, subparsers=None): m_type = type(arg_group) self.arg_groups[m_type] = arg_group - allow_abbrev = all(arg_group.allows_abbreviation() for arg_group in self.arg_groups.values()) + allow_abbrev = all( + arg_group.allows_abbreviation() for arg_group in self.arg_groups.values() + ) description = dedent(self.__doc__) if subparsers is not None: @@ -78,7 +82,9 @@ def setup_parser(self, subparsers=None): ) parser.set_defaults(subcommand=self) else: - parser = argparse.ArgumentParser(add_help=True, description=description, allow_abbrev=allow_abbrev) + parser = argparse.ArgumentParser( + add_help=True, description=description, allow_abbrev=allow_abbrev + ) for arg_group in self.arg_groups.values(): arg_group.register(self.arg_groups) @@ -89,7 +95,9 @@ def setup_parser(self, subparsers=None): try: self.add_parser_args(parser) except Exception as err: - G_LOGGER.internal_error(f"Could not register tool argument parser for: {self.name}\nNote: Error was: {err}") + G_LOGGER.internal_error( + f"Could not register tool argument parser for: {self.name}\nNote: Error was: {err}" + ) return parser # Implementation for `get_subscriptions`. This should be implemented by child classes instead of `get_subscriptions` diff --git a/tools/Polygraphy/polygraphy/tools/check/check.py b/tools/Polygraphy/polygraphy/tools/check/check.py index 7d7d8dfd..8667f341 100644 --- a/tools/Polygraphy/polygraphy/tools/check/check.py +++ b/tools/Polygraphy/polygraphy/tools/check/check.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/tools/check/subtool/lint.py b/tools/Polygraphy/polygraphy/tools/check/subtool/lint.py index 0b45c500..300cd5de 100644 --- a/tools/Polygraphy/polygraphy/tools/check/subtool/lint.py +++ b/tools/Polygraphy/polygraphy/tools/check/subtool/lint.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -33,7 +33,12 @@ from polygraphy.json import save_json from polygraphy.logger import G_LOGGER from polygraphy.tools import util as tools_util -from polygraphy.tools.args import DataLoaderArgs, ModelArgs, OnnxLoadArgs, OnnxrtSessionArgs +from polygraphy.tools.args import ( + DataLoaderArgs, + ModelArgs, + OnnxLoadArgs, + OnnxrtSessionArgs, +) from polygraphy.tools.base import Tool onnx = mod.lazy_import("onnx") @@ -83,7 +88,10 @@ class Lint(Tool): 6. Large models (>2GB) require external data to be in same directory as the model file, custom paths to external data are not supported. """ - CUSTOM_OP_EXCEPTION_SUBSTRS = ["No opset import for domain", "is not a registered function/op"] + CUSTOM_OP_EXCEPTION_SUBSTRS = [ + "No opset import for domain", + "is not a registered function/op", + ] ONNX_CHECKER_IGNORE_SUBSTR = "Bad node spec for node" INVALID_ONNX_EXCEPTION_SUBSTR = "Error parsing message with type 'onnx.ModelProto'" MAXIMUM_PROTOBUF = 2e9 # 2GB @@ -158,9 +166,13 @@ def make_singleton_graph(self) -> Optional["gs.Graph"]: """ node = self.cur_node - inp_names = {inp.name for inp in node.inputs if isinstance(inp, gs.Variable)} + inp_names = { + inp.name for inp in node.inputs if isinstance(inp, gs.Variable) + } - if not all([inp in self.cache for inp in inp_names]): # Need all inputs to be available in the cache + if not all( + [inp in self.cache for inp in inp_names] + ): # Need all inputs to be available in the cache return None singleton = self.graph.copy() @@ -198,7 +210,9 @@ def update(self, output_dict: Optional[dict]): # means that something went wrong with its ancestor nodes. continue self.num_consumers[inp.name] -= 1 - if self.num_consumers[inp.name] == 0: # All consuming nodes of this tensor have been visited + if ( + self.num_consumers[inp.name] == 0 + ): # All consuming nodes of this tensor have been visited G_LOGGER.super_verbose(f"removing tensor: `{inp.name}` from cache") del self.cache[inp.name] # Can delete the tensor from the cache @@ -214,9 +228,13 @@ def update(self, output_dict: Optional[dict]): elif isinstance( out, gs.Constant ): # This theoretically should never happen, as constants are not outputs of nodes - G_LOGGER.critical(f"tensor: `{out.name}` is a constant, but is part of the output!") + G_LOGGER.critical( + f"tensor: `{out.name}` is a constant, but is part of the output!" + ) else: - G_LOGGER.critical(f"tensor: `{out.name}` is neither a variable nor a constant") + G_LOGGER.critical( + f"tensor: `{out.name}` is neither a variable nor a constant" + ) def set_graph_inputs(self, feed_dict: dict): """ @@ -237,7 +255,9 @@ def feed_dict(self) -> dict: f"tensor: {inp.name} missing in input cache! are you sure current node {self.cur_node.name} is valid?" ) # This should never happen elif isinstance(inp, gs.Constant): - G_LOGGER.super_verbose(f"tensor: `{inp.name}` is a constant, not tracked in cache. ") + G_LOGGER.super_verbose( + f"tensor: `{inp.name}` is a constant, not tracked in cache. " + ) continue _feed_dict[inp.name] = self.cache[inp.name] @@ -339,7 +359,9 @@ def add( scope = "" if node_name and op: scope = f"Name: {node_name}, Op: {op} | " - G_LOGGER.log(f"LINT | {scope}{message}", severity=severity_from_level[level]) + G_LOGGER.log( + f"LINT | {scope}{message}", severity=severity_from_level[level] + ) lint_entry = { "level": level.value, "source": source.value, @@ -359,7 +381,9 @@ def add( self.lint_entries.append(lint_entry) - self.is_model_valid = (level != Lint.Level.EXCEPTION) and self.is_model_valid + self.is_model_valid = ( + level != Lint.Level.EXCEPTION + ) and self.is_model_valid elif node_name not in self.summary["failing"]: self.summary["passing"].update([node_name]) @@ -401,7 +425,11 @@ def _prune_ONNXRuntimeError_formatting(message): # The ORT message format is not as expected, so just return the message pruning the prefix return message.split("[ONNXRuntimeError] : ")[1] message = "".join(parts[3:]).replace('"', "`") - for substr in ORT_SUBSTRS_TO_PRUNE: # remove substrings that are not useful in the error message + for ( + substr + ) in ( + ORT_SUBSTRS_TO_PRUNE + ): # remove substrings that are not useful in the error message message = message.replace(substr, "") return message @@ -411,6 +439,9 @@ def _prune_ONNXRuntimeError_formatting(message): "SystemError: " + x.split(" : ")[1] ), # If starts with "SystemError", it is likely due to improper installation of ONNX Runtime. r"\[ONNXRuntimeError\] : .*": _prune_ONNXRuntimeError_formatting, # [ONNXRuntimeError] : {code} : {StatusCodeToString(code)} : {msg} + r"\x1b(?:\[(?:\d+;){0,2}\d+m)(.*)\x1b\[m": lambda msg: re.sub( + r"\x1b(?:\[(?:\d+;){0,2}\d+m)(.*)\x1b\[m", "\\1", msg + ), # Remove log coloration characters from https://github.com/microsoft/onnxruntime/blob/b33216be4c02adfbbdeac2fd30ddc55f673eda3d/onnxruntime/core/common/logging/sinks/ostream_sink.cc#L24 r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ \[.*?\]\ ": lambda msg: re.sub( r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ \[.*?\]\ ", "", msg ), # (e.g: https://github.com/microsoft/onnxruntime/blob/24566058b3e5bb9e511513977cee6e7c553fd5c2/onnxruntime/core/graph/graph.cc#L3545-L3546) @@ -518,7 +549,9 @@ def _generate_unique_name(node_id): while name in names: # guarantee unique name name = f"polygraphy_unnamed_node_{node_id}_{uid}" uid += 1 - G_LOGGER.verbose(f"Node with topological id: {node_id} has empty name. Renaming to: {name}") + G_LOGGER.verbose( + f"Node with topological id: {node_id} has empty name. Renaming to: {name}" + ) return name for node in graph.nodes: @@ -648,21 +681,27 @@ def wrapper(*args, **kwargs): captured_stderr = io.BytesIO() captured_exception = None result = None - with contextlib.redirect_stdout(captured_stdout), stderr_redirector(captured_stderr): + with contextlib.redirect_stdout(captured_stdout), stderr_redirector( + captured_stderr + ): try: # Execute the function result = func(*args, **kwargs) except Exception as err: # pylint: disable=broad-except captured_exception = err UTF_TYPE = "utf-16-le" if os.name == "nt" else "utf-8" - stderr_msg = captured_stderr.getvalue().decode(UTF_TYPE) # platform-dependent + stderr_msg = captured_stderr.getvalue().decode( + UTF_TYPE + ) # platform-dependent stdout_msg = captured_stdout.getvalue() return (result, captured_exception, stderr_msg, stdout_msg) return wrapper @capture - def _ort_inference_check(model_bytes: Union[bytes, str], feed_dict: OrderedDict) -> Optional[OrderedDict]: + def _ort_inference_check( + model_bytes: Union[bytes, str], feed_dict: OrderedDict + ) -> Optional[OrderedDict]: """ Runs inference using ONNX-Runtime. @@ -704,7 +743,10 @@ def _unused_info_helper(graph: "gs.Graph"): cleaned_input_tensor_names = {inp.name for inp in graph.inputs} cleaned_node_info = {(node.name, node.op) for node in graph.nodes} - return (orig_node_info - cleaned_node_info, orig_input_tensor_names - cleaned_input_tensor_names) + return ( + orig_node_info - cleaned_node_info, + orig_input_tensor_names - cleaned_input_tensor_names, + ) def _report_unused_info(graph: "gs.Graph"): """ @@ -721,12 +763,18 @@ def _report_unused_info(graph: "gs.Graph"): - All nodes in the graph are expected to have non-empty names. """ - (unused_node_info, unused_input_tensor_names), exception, _, _ = _unused_info_helper(graph) + (unused_node_info, unused_input_tensor_names), exception, _, _ = ( + _unused_info_helper(graph) + ) if exception: # something went wrong here. - G_LOGGER.internal_error(f"Failed to report unused nodes. Error: {exception}") - G_LOGGER.warning(f"Failed to report unused nodes. Error: {exception}. Continuing...") + G_LOGGER.internal_error( + f"Failed to report unused nodes. Error: {exception}" + ) + G_LOGGER.warning( + f"Failed to report unused nodes. Error: {exception}. Continuing..." + ) # report unused tensors that are also inputs (intermediate tensors are not reported) for inp_name in sorted(list(unused_input_tensor_names)): @@ -804,7 +852,7 @@ def _report_unused_info(graph: "gs.Graph"): # NOTE: This is only done if early-exiting, as otherwise these warnings tend to be repeats # of node level warnings/exceptions. if warn_str: - warnings = warn_str.split('\n') + warnings = warn_str.split("\n") for warning in warnings: if len(warning) > 0: self.report.add( @@ -818,7 +866,9 @@ def _report_unused_info(graph: "gs.Graph"): self.report.summary["passing"] = {node.name for node in graph.nodes} self.report.export(args.output) - G_LOGGER.verbose("ORT inference check passed. Model is valid. Early exiting.") + G_LOGGER.verbose( + "ORT inference check passed. Model is valid. Early exiting." + ) return 0 if isinstance(exception, PolygraphyException): # PolygraphyException is raised when the provided input is not compatible with polygraphy @@ -830,18 +880,29 @@ def _report_unused_info(graph: "gs.Graph"): # start Node-level linting with Lint.ContextManager(graph) as lcm: - lcm.set_graph_inputs(feed_dict) # load the cache with initial feed_dict values for iterative inference. + lcm.set_graph_inputs( + feed_dict + ) # load the cache with initial feed_dict values for iterative inference. for _ in lcm.nodes(): g = lcm.make_singleton_graph() inference_output = None if g: # has valid ancestors. Can perform inference. - model_bytes = onnx_backend.BytesFromOnnx(gs.export_onnx(g, do_type_check=False)) - inference_output, exception, _, _ = _ort_inference_check(model_bytes, lcm.feed_dict()) + model_bytes = onnx_backend.BytesFromOnnx( + gs.export_onnx(g, do_type_check=False) + ) + inference_output, exception, _, _ = _ort_inference_check( + model_bytes, lcm.feed_dict() + ) # NOTE: we ignore stdout and stderr as it contains info from polygraphy not relevant to linting. err_str = str(exception) if exception else "" - if any([substr in err_str for substr in Lint.CUSTOM_OP_EXCEPTION_SUBSTRS]): + if any( + [ + substr in err_str + for substr in Lint.CUSTOM_OP_EXCEPTION_SUBSTRS + ] + ): self.report.add( level=Lint.Level.WARNING, source=Lint.Source.ONNXRUNTIME, diff --git a/tools/Polygraphy/polygraphy/tools/convert/convert.py b/tools/Polygraphy/polygraphy/tools/convert/convert.py index 4d3d3531..219fcc8a 100644 --- a/tools/Polygraphy/polygraphy/tools/convert/convert.py +++ b/tools/Polygraphy/polygraphy/tools/convert/convert.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -65,7 +65,9 @@ def get_subscriptions_impl(self): ] def add_parser_args_impl(self, parser): - parser.add_argument("-o", "--output", help="Path to save the converted model", required=True) + parser.add_argument( + "-o", "--output", help="Path to save the converted model", required=True + ) parser.add_argument( "--convert-to", help="The format to attempt to convert the model to." @@ -86,16 +88,24 @@ def run_impl(self, args): convert_type = "onnx-like-trt-network" else: CONVERT_TO_MODEL_TYPE_MAPPING = {"onnx": "onnx", "trt": "engine"} - convert_type = ModelArgs.ModelType(CONVERT_TO_MODEL_TYPE_MAPPING[args.convert_to]) + convert_type = ModelArgs.ModelType( + CONVERT_TO_MODEL_TYPE_MAPPING[args.convert_to] + ) if convert_type == "onnx-like-trt-network": - onnx_like = trt_backend.onnx_like_from_network(self.arg_groups[TrtLoadNetworkArgs].load_network()) + onnx_like = trt_backend.onnx_like_from_network( + self.arg_groups[TrtLoadNetworkArgs].load_network() + ) onnx_backend.save_onnx(onnx_like, args.output) elif convert_type.is_onnx(): model = self.arg_groups[OnnxLoadArgs].load_onnx() self.arg_groups[OnnxSaveArgs].save_onnx(model, args.output) elif convert_type.is_trt(): - with self.arg_groups[TrtLoadEngineBytesArgs].load_engine_bytes() as serialized_engine: - self.arg_groups[TrtSaveEngineBytesArgs].save_engine_bytes(serialized_engine, args.output) + with self.arg_groups[ + TrtLoadEngineBytesArgs + ].load_engine_bytes() as serialized_engine: + self.arg_groups[TrtSaveEngineBytesArgs].save_engine_bytes( + serialized_engine, args.output + ) else: G_LOGGER.critical(f"Cannot convert to model type: {convert_type}") diff --git a/tools/Polygraphy/polygraphy/tools/data/data.py b/tools/Polygraphy/polygraphy/tools/data/data.py index 52374de2..d26e9d18 100644 --- a/tools/Polygraphy/polygraphy/tools/data/data.py +++ b/tools/Polygraphy/polygraphy/tools/data/data.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/tools/data/subtool/to_input.py b/tools/Polygraphy/polygraphy/tools/data/subtool/to_input.py index f19897c1..e2f92dde 100644 --- a/tools/Polygraphy/polygraphy/tools/data/subtool/to_input.py +++ b/tools/Polygraphy/polygraphy/tools/data/subtool/to_input.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,7 +40,9 @@ def add_parser_args(self, parser): "Otherwise, the outputs from one runner may be overwritten by those of a subsequent runner. ", nargs="+", ) - parser.add_argument("-o", "--output", help="Path to the file to generate", required=True) + parser.add_argument( + "-o", "--output", help="Path to the file to generate", required=True + ) def run_impl(self, args): inputs = [] @@ -73,4 +75,8 @@ def update_inputs(new_inputs, path): data = [data] update_inputs(data, path) - save_json(inputs, args.output, description=f"input file containing {len(inputs)} iteration(s)") + save_json( + inputs, + args.output, + description=f"input file containing {len(inputs)} iteration(s)", + ) diff --git a/tools/Polygraphy/polygraphy/tools/debug/debug.py b/tools/Polygraphy/polygraphy/tools/debug/debug.py index 16ddc187..313e0563 100644 --- a/tools/Polygraphy/polygraphy/tools/debug/debug.py +++ b/tools/Polygraphy/polygraphy/tools/debug/debug.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/tools/debug/subtool/base.py b/tools/Polygraphy/polygraphy/tools/debug/subtool/base.py index b94d2363..5982360f 100644 --- a/tools/Polygraphy/polygraphy/tools/debug/subtool/base.py +++ b/tools/Polygraphy/polygraphy/tools/debug/subtool/base.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -31,7 +31,11 @@ TrtSaveEngineBytesArgs, ) from polygraphy.tools.base import Tool -from polygraphy.tools.debug.subtool.iterative_debug_args import ArtifactSortArgs, CheckCmdArgs, IterativeDebugArgs +from polygraphy.tools.debug.subtool.iterative_debug_args import ( + ArtifactSortArgs, + CheckCmdArgs, + IterativeDebugArgs, +) trt_backend = mod.lazy_import("polygraphy.backend.trt") trt = mod.lazy_import("tensorrt>=8.5") @@ -55,7 +59,9 @@ def __init__( def get_subscriptions_impl(self): return [ CheckCmdArgs(), - ArtifactSortArgs(allow_no_artifacts_warning=self._allow_no_artifacts_warning), + ArtifactSortArgs( + allow_no_artifacts_warning=self._allow_no_artifacts_warning + ), IterativeDebugArgs( iter_art_opt_default="polygraphy_debug.engine", allow_until_opt=self._allow_until_opt, @@ -65,7 +71,9 @@ def get_subscriptions_impl(self): OnnxInferShapesArgs(), OnnxLoadArgs(outputs_opt_prefix=False), DataLoaderArgs(), # For int8 calibration - TrtConfigArgs(precision_constraints_default=self._precision_constraints_default), + TrtConfigArgs( + precision_constraints_default=self._precision_constraints_default + ), TrtLoadPluginsArgs(), TrtOnnxFlagArgs(), TrtLoadNetworkArgs(), @@ -110,7 +118,9 @@ def remaining(self): pass def run_impl(self, args): - builder, network, _ = util.unpack_args(self.arg_groups[TrtLoadNetworkArgs].load_network(), 3) + builder, network, _ = util.unpack_args( + self.arg_groups[TrtLoadNetworkArgs].load_network(), 3 + ) self.setup(args, network) @@ -118,18 +128,23 @@ def make_iter_art(_): self.process_network(network) try: - serialized_engine = self.arg_groups[TrtLoadEngineBytesArgs].load_engine_bytes((builder, network)) + serialized_engine = self.arg_groups[ + TrtLoadEngineBytesArgs + ].load_engine_bytes((builder, network)) except Exception as err: G_LOGGER.warning( f"Failed to create network or engine, continuing to the next iteration.\nNote: Error was: {err}" ) - G_LOGGER.internal_error("Failed to create network or engine. See warning above for details.") + G_LOGGER.internal_error( + "Failed to create network or engine. See warning above for details." + ) self.arg_groups[IterativeDebugArgs].skip_iteration(success=False) else: # Don't need to keep the engine around in memory - just serialize to disk and free it. with serialized_engine: self.arg_groups[TrtSaveEngineBytesArgs].save_engine_bytes( - serialized_engine, self.arg_groups[IterativeDebugArgs].iter_artifact_path + serialized_engine, + self.arg_groups[IterativeDebugArgs].iter_artifact_path, ) def advance(context): diff --git a/tools/Polygraphy/polygraphy/tools/debug/subtool/build.py b/tools/Polygraphy/polygraphy/tools/debug/subtool/build.py index 3854b35a..579e1559 100644 --- a/tools/Polygraphy/polygraphy/tools/debug/subtool/build.py +++ b/tools/Polygraphy/polygraphy/tools/debug/subtool/build.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/tools/debug/subtool/iterative_debug_args.py b/tools/Polygraphy/polygraphy/tools/debug/subtool/iterative_debug_args.py index 664c4837..773551f1 100644 --- a/tools/Polygraphy/polygraphy/tools/debug/subtool/iterative_debug_args.py +++ b/tools/Polygraphy/polygraphy/tools/debug/subtool/iterative_debug_args.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -68,7 +68,9 @@ def encode(iter_context): @Decoder.register(IterationContext) def decode(dct): - return IterationContext(state=dct["state"], iteration_info=dct["iteration_info"], success=dct["success"]) + return IterationContext( + state=dct["state"], iteration_info=dct["iteration_info"], success=dct["success"] + ) class CheckCmdArgs(BaseArgs): @@ -157,7 +159,9 @@ def parse_impl(self, args): self.check = args_util.get(args, "check") if self.check is None: - G_LOGGER.start("Starting interactive debugging session since no `--check` command was provided") + G_LOGGER.start( + "Starting interactive debugging session since no `--check` command was provided" + ) self.fail_codes = args_util.get(args, "fail_codes") self.ignore_fail_codes = args_util.get(args, "ignore_fail_codes") @@ -194,9 +198,7 @@ def prompt_user(msg): return None return res[0] - prompt = ( - f"Did '{iter_artifact_path if iter_artifact_path is not None else 'this iteration'}' [p]ass or [f]ail?" - ) + prompt = f"Did '{iter_artifact_path if iter_artifact_path is not None else 'this iteration'}' [p]ass or [f]ail?" response = prompt_user(prompt) while response not in ["p", "f"]: response = prompt_user("Please choose either: 'p' or 'f':") @@ -209,7 +211,9 @@ def is_status_success(status): has_fail_regex = None if self.fail_regexes is not None: output = status.stdout.decode() + status.stderr.decode() - has_fail_regex = any(regex.search(output) is not None for regex in self.fail_regexes) + has_fail_regex = any( + regex.search(output) is not None for regex in self.fail_regexes + ) if self.fail_codes is not None: # If a fail-code is specified, then we should also check has_fail_regex if provided. @@ -219,7 +223,9 @@ def is_status_success(status): else: # If a fail-code is not specified, we should trigger failures even on 0-status # if the fail regex is found. - failed = status.returncode != 0 if has_fail_regex is None else has_fail_regex + failed = ( + status.returncode != 0 if has_fail_regex is None else has_fail_regex + ) return not failed G_LOGGER.info(f"Running check command: {' '.join(self.check)}") @@ -228,7 +234,9 @@ def is_status_success(status): if self.show_output or (not success and not self.hide_fail_output): stderr_log_level = G_LOGGER.WARNING if success else G_LOGGER.ERROR - G_LOGGER.info(f"========== CAPTURED STDOUT ==========\n{status.stdout.decode()}") + G_LOGGER.info( + f"========== CAPTURED STDOUT ==========\n{status.stdout.decode()}" + ) G_LOGGER.log( f"========== CAPTURED STDERR ==========\n{status.stderr.decode()}", severity=stderr_log_level, @@ -249,7 +257,9 @@ def __init__(self, allow_no_artifacts_warning=None): Defaults to True. """ super().__init__() - self._allow_no_artifacts_warning = util.default(allow_no_artifacts_warning, True) + self._allow_no_artifacts_warning = util.default( + allow_no_artifacts_warning, True + ) def add_parser_args_impl(self): self.group.add_argument( @@ -350,7 +360,11 @@ class IterativeDebugArgs(BaseArgs): """ def __init__( - self, allow_iter_art_opt=None, iter_art_opt_default=None, allow_until_opt=None, allow_debug_replay=None + self, + allow_iter_art_opt=None, + iter_art_opt_default=None, + allow_until_opt=None, + allow_debug_replay=None, ): """ Args: @@ -372,7 +386,9 @@ def __init__( self._iter_art_opt_default = iter_art_opt_default if allow_iter_art_opt and not iter_art_opt_default: - G_LOGGER.internal_error("Must provide iter_art_opt_default if intermediate artifact is enabled") + G_LOGGER.internal_error( + "Must provide iter_art_opt_default if intermediate artifact is enabled" + ) self._allow_until_opt = util.default(allow_until_opt, False) self._allow_debug_replay = util.default(allow_debug_replay, True) @@ -481,7 +497,9 @@ def parse_impl(self, args): except: until = until if until not in ["good", "bad"]: - G_LOGGER.critical(f"--until value must be an integer, 'good', or 'bad', but was: {until}") + G_LOGGER.critical( + f"--until value must be an integer, 'good', or 'bad', but was: {until}" + ) self.until = until self.load_debug_replay = args_util.get(args, "load_debug_replay") @@ -603,7 +621,9 @@ def iterate( def handle_until(context: IterationContext): index, success = context.iteration_info["iteration"], context.success if isinstance(self.until, str): - if (self.until == "good" and success) or (self.until == "bad" and not success): + if (self.until == "good" and success) or ( + self.until == "bad" and not success + ): self.arg_groups[IterativeDebugArgs].stop_iteration() elif index >= self.until: self.arg_groups[IterativeDebugArgs].stop_iteration() @@ -638,10 +658,14 @@ def func(): for index in range(MAX_COUNT): - context = IterationContext(state={}, iteration_info={"iteration": index}, success=True) + context = IterationContext( + state={}, iteration_info={"iteration": index}, success=True + ) with contextlib.ExitStack() as stack, G_LOGGER.indent(): - remaining = get_remaining_func() if get_remaining_func is not None else None + remaining = ( + get_remaining_func() if get_remaining_func is not None else None + ) G_LOGGER.start( f"RUNNING | Iteration {index + 1}{f' | Approximately {remaining} iteration(s) remaining' if remaining is not None else ''}" @@ -653,9 +677,13 @@ def log_status(iter_success, start_time): duration_in_sec = time.time() - start_time if iter_success: num_passed += 1 - G_LOGGER.finish(f"PASSED | Iteration {index + 1} | Duration {duration_in_sec}s") + G_LOGGER.finish( + f"PASSED | Iteration {index + 1} | Duration {duration_in_sec}s" + ) else: - G_LOGGER.error(f"FAILED | Iteration {index + 1} | Duration {duration_in_sec}s") + G_LOGGER.error( + f"FAILED | Iteration {index + 1} | Duration {duration_in_sec}s" + ) # We must include the suffix in the debug replay key to disambiguate debug_replay_key = f"_N{index}" + (("_" + suffix) if suffix else "") @@ -663,7 +691,9 @@ def log_status(iter_success, start_time): start_time = time.time() if debug_replay_key in debug_replay: context = debug_replay[debug_replay_key] - G_LOGGER.info(f"Loading iteration information from debug replay: success={context.success}") + G_LOGGER.info( + f"Loading iteration information from debug replay: success={context.success}" + ) else: # Ensure that the intermediate artifact will be removed at the end of the iteration if requested. if self.iter_artifact_path and self.remove_intermediate: @@ -699,8 +729,12 @@ def save_replay(replay, description=None, suffix=None): save_replay(debug_replay, suffix="_skip_current") if do_check: - context.success = self.arg_groups[CheckCmdArgs].run_check(self.iter_artifact_path) - self.arg_groups[ArtifactSortArgs].sort_artifacts(context.success, suffix=debug_replay_key) + context.success = self.arg_groups[CheckCmdArgs].run_check( + self.iter_artifact_path + ) + self.arg_groups[ArtifactSortArgs].sort_artifacts( + context.success, suffix=debug_replay_key + ) debug_replay[debug_replay_key] = context save_replay(debug_replay, "debug replay") diff --git a/tools/Polygraphy/polygraphy/tools/debug/subtool/precision.py b/tools/Polygraphy/polygraphy/tools/debug/subtool/precision.py index 336ee9ec..2a8588eb 100644 --- a/tools/Polygraphy/polygraphy/tools/debug/subtool/precision.py +++ b/tools/Polygraphy/polygraphy/tools/debug/subtool/precision.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -31,14 +31,20 @@ def __init__(self, max_layers, direction, num_layers_to_mark): self.max_layers = max_layers self.direction = direction self.num_layers_to_mark = num_layers_to_mark - self.good = max_layers + 1 # Pretend marking all the layers gives us good accuracy. + self.good = ( + max_layers + 1 + ) # Pretend marking all the layers gives us good accuracy. def select_layers(self): if self.direction == "forward": - G_LOGGER.info(f"Selecting first {self.num_layers_to_mark} layer(s) to run in higher precision") + G_LOGGER.info( + f"Selecting first {self.num_layers_to_mark} layer(s) to run in higher precision" + ) return range(0, self.num_layers_to_mark) else: - G_LOGGER.info(f"Selecting last {self.num_layers_to_mark} layer(s) to run in higher precision") + G_LOGGER.info( + f"Selecting last {self.num_layers_to_mark} layer(s) to run in higher precision" + ) return range(self.max_layers - self.num_layers_to_mark, self.max_layers) def success_message(self): @@ -73,13 +79,17 @@ def step(self, success): # then we already have the information we need. if abs(self.good - self.bad) <= 1: if self.good >= self.max_layers: - G_LOGGER.error("Could not find a configuration that satisfied accuracy requirements.") + G_LOGGER.error( + "Could not find a configuration that satisfied accuracy requirements." + ) else: self.success_message() return True if self.num_layers_to_mark > self.max_layers: - G_LOGGER.error("Could not find a configuration that satisfied accuracy requirements.") + G_LOGGER.error( + "Could not find a configuration that satisfied accuracy requirements." + ) return True return False @@ -102,7 +112,9 @@ def step(self, success): return True if self.num_layers_to_mark > self.max_layers: - G_LOGGER.error("Could not find a configuration that satisfied accuracy requirements.") + G_LOGGER.error( + "Could not find a configuration that satisfied accuracy requirements." + ) return True return False @@ -122,7 +134,11 @@ class Precision(BaseCheckerSubtool): """ def __init__(self): - super().__init__("precision", precision_constraints_default="obey", allow_no_artifacts_warning=False) + super().__init__( + "precision", + precision_constraints_default="obey", + allow_no_artifacts_warning=False, + ) def add_parser_args(self, parser): parser.add_argument( @@ -151,7 +167,9 @@ def add_parser_args(self, parser): ) def setup(self, args, network): - self.precision = {"float32": trt.float32, "float16": trt.float16}[args.precision] + self.precision = {"float32": trt.float32, "float16": trt.float16}[ + args.precision + ] if self.precision == trt.float16 and not self.arg_groups[TrtConfigArgs].fp16: G_LOGGER.critical( @@ -172,7 +190,9 @@ def setup(self, args, network): self.arg_groups[TrtConfigArgs].int8, ] ): - G_LOGGER.critical("Please enable at least one precision besides float32 (e.g. --int8, --fp16, --tf32)") + G_LOGGER.critical( + "Please enable at least one precision besides float32 (e.g. --int8, --fp16, --tf32)" + ) if self.arg_groups[ModelArgs].model_type == "engine": G_LOGGER.critical( @@ -194,7 +214,11 @@ def setup(self, args, network): def mark_layers(self, network, indices): EXCLUDE_LAYER_NAMES = ["CONSTANT"] - EXCLUDE_LAYERS = [getattr(trt.LayerType, attr) for attr in EXCLUDE_LAYER_NAMES if hasattr(trt.LayerType, attr)] + EXCLUDE_LAYERS = [ + getattr(trt.LayerType, attr) + for attr in EXCLUDE_LAYER_NAMES + if hasattr(trt.LayerType, attr) + ] # First, reset, since changes from the previous call will persist. for index, layer in enumerate(network): @@ -209,20 +233,30 @@ def mark_layers(self, network, indices): def should_exclude(): has_non_execution_output = any( - not layer.get_output(i).is_execution_tensor for i in range(layer.num_outputs) + not layer.get_output(i).is_execution_tensor + for i in range(layer.num_outputs) ) has_non_activation_output = any( - layer.get_output(i).dtype not in [trt.float32, trt.float16, trt.int8] + layer.get_output(i).dtype + not in [trt.float32, trt.float16, trt.int8] for i in range(layer.num_outputs) ) - return layer.type in EXCLUDE_LAYERS or has_non_execution_output or has_non_activation_output + return ( + layer.type in EXCLUDE_LAYERS + or has_non_execution_output + or has_non_activation_output + ) if not should_exclude(): - G_LOGGER.extra_verbose(f"Running layer in higher precision: {trt_util.str_from_layer(layer, index)}") + G_LOGGER.extra_verbose( + f"Running layer in higher precision: {trt_util.str_from_layer(layer, index)}" + ) layer.precision = self.precision marked_indices.add(index) - G_LOGGER.verbose(f"Marking layer(s): {marked_indices} to run in {self.precision} precision") + G_LOGGER.verbose( + f"Marking layer(s): {marked_indices} to run in {self.precision} precision" + ) def process_network(self, network): indices = list(self.layer_marker.select_layers()) diff --git a/tools/Polygraphy/polygraphy/tools/debug/subtool/reduce.py b/tools/Polygraphy/polygraphy/tools/debug/subtool/reduce.py index 31184920..8126364f 100644 --- a/tools/Polygraphy/polygraphy/tools/debug/subtool/reduce.py +++ b/tools/Polygraphy/polygraphy/tools/debug/subtool/reduce.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,9 +23,19 @@ from polygraphy.datatype import DataType from polygraphy.logger import G_LOGGER, LogMode from polygraphy.tools import util as tools_util -from polygraphy.tools.args import DataLoaderArgs, ModelArgs, OnnxInferShapesArgs, OnnxLoadArgs, OnnxSaveArgs +from polygraphy.tools.args import ( + DataLoaderArgs, + ModelArgs, + OnnxInferShapesArgs, + OnnxLoadArgs, + OnnxSaveArgs, +) from polygraphy.tools.base import Tool -from polygraphy.tools.debug.subtool.iterative_debug_args import ArtifactSortArgs, CheckCmdArgs, IterativeDebugArgs +from polygraphy.tools.debug.subtool.iterative_debug_args import ( + ArtifactSortArgs, + CheckCmdArgs, + IterativeDebugArgs, +) gs = mod.lazy_import("onnx_graphsurgeon>=0.3.6") onnx_backend = mod.lazy_import("polygraphy.backend.onnx") @@ -68,7 +78,9 @@ def finish(self): # Find the index of the node that has the highest number of nodes less than _least_bad_nodes, but still is successful. # Failing that, use the smallest possible subgraph (which will always be > _least_bad_nodes) def split_good(cond): - return {num: idx for num, idx in self._good_node_indices.items() if cond(num)} + return { + num: idx for num, idx in self._good_node_indices.items() if cond(num) + } max_smaller_graph = split_good(lambda num: num < self._least_bad_nodes) min_larger_graph = split_good(lambda num: num >= self._least_bad_nodes) @@ -174,7 +186,11 @@ def get_subscriptions_impl(self): CheckCmdArgs(), ArtifactSortArgs(allow_no_artifacts_warning=False), IterativeDebugArgs(iter_art_opt_default="polygraphy_debug.onnx"), - ModelArgs(model_opt_required=True, input_shapes_opt_name="model-inputs", required_model_type="onnx"), + ModelArgs( + model_opt_required=True, + input_shapes_opt_name="model-inputs", + required_model_type="onnx", + ), OnnxSaveArgs(), OnnxInferShapesArgs(default=True, allow_force_fallback=True), OnnxLoadArgs(outputs_opt_prefix=False), @@ -235,7 +251,9 @@ def run_impl(self, args): user_input_metadata = self.arg_groups[ModelArgs].input_shapes if user_input_metadata: model = gs.export_onnx( - tools_util.override_input_shapes(onnx_backend.gs_from_onnx(model), user_input_metadata) + tools_util.override_input_shapes( + onnx_backend.gs_from_onnx(model), user_input_metadata + ) ) model = self.arg_groups[OnnxInferShapesArgs].infer_shapes(model) @@ -252,7 +270,10 @@ def run_impl(self, args): def load_tensors_from_fallback(names): nonlocal fallback_outputs, fallback_metadata - if all((name in fallback_metadata and name in fallback_outputs) for name in names): + if all( + (name in fallback_metadata and name in fallback_outputs) + for name in names + ): return G_LOGGER.info( @@ -260,12 +281,16 @@ def load_tensors_from_fallback(names): "This will cause intermediate models to have static shapes." ) with G_LOGGER.indent(): - new_outputs, new_meta = self.arg_groups[OnnxInferShapesArgs].fallback_inference(model, outputs=names) + new_outputs, new_meta = self.arg_groups[ + OnnxInferShapesArgs + ].fallback_inference(model, outputs=names) fallback_outputs.update(new_outputs) fallback_metadata.update(new_meta) if self.arg_groups[OnnxInferShapesArgs].force_fallback: - G_LOGGER.info("Freezing shapes in the model according to values determined by fallback shape inference") + G_LOGGER.info( + "Freezing shapes in the model according to values determined by fallback shape inference" + ) load_tensors_from_fallback(constants.MARK_ALL) onnx_util.set_shapes_from_layerwise_meta(GRAPH, fallback_metadata) @@ -275,7 +300,10 @@ def load_tensors_from_fallback(names): "You may want to provide input shapes to `debug reduce` using the " "`--model-input-shapes` option to prevent unexpected behavior.\n" ) - elif any(tensor.shape is None or util.is_shape_dynamic(tensor.shape) for tensor in GRAPH.tensors().values()): + elif any( + tensor.shape is None or util.is_shape_dynamic(tensor.shape) + for tensor in GRAPH.tensors().values() + ): msg = "" if self.arg_groups[OnnxInferShapesArgs].do_shape_inference: msg += "ONNX shape inference was unable to infer some shapes in this model.\n" @@ -309,7 +337,11 @@ def fix_graph(graph): """ def get_tensor_names_needing_fallback(tensors, fix_shape=True): - return [tensor.name for tensor in tensors if (not tensor.shape and fix_shape) or not tensor.dtype] + return [ + tensor.name + for tensor in tensors + if (not tensor.shape and fix_shape) or not tensor.dtype + ] load_tensors_from_fallback( get_tensor_names_needing_fallback(graph.inputs) @@ -320,9 +352,14 @@ def fix_tensor_metadata(tensors): for tensor in tensors: # If a tensor is not in `fallback_metadata`, it means it doesn't require metadata to be updated. if tensor.name in fallback_metadata: - tensor.shape = tensor.shape or fallback_metadata[tensor.name].shape + tensor.shape = ( + tensor.shape or fallback_metadata[tensor.name].shape + ) tensor.dtype = DataType.to_dtype( - DataType.from_dtype(tensor.dtype or fallback_metadata[tensor.name].dtype), "onnx" + DataType.from_dtype( + tensor.dtype or fallback_metadata[tensor.name].dtype + ), + "onnx", ) fix_tensor_metadata(graph.inputs) @@ -334,10 +371,17 @@ def fix_tensor_metadata(tensors): tensor_map = graph.tensors() tensors_to_freeze = [] # Names of tensors we need to freeze in the model. for name, tensor in tensor_map.items(): - if isinstance(tensor, gs.Variable) and not tensor.inputs and tensor not in graph.inputs: + if ( + isinstance(tensor, gs.Variable) + and not tensor.inputs + and tensor not in graph.inputs + ): tensors_to_freeze.append(name) - if tensors_to_freeze and self.arg_groups[DataLoaderArgs].is_using_random_data(): + if ( + tensors_to_freeze + and self.arg_groups[DataLoaderArgs].is_using_random_data() + ): G_LOGGER.warning( "This model includes multiple branches/paths. In order to continue reducing, one branch needs to be folded away.\n" "Please ensure that you have provided a data loader argument directly to `debug reduce` (i.e. prior to `--check`) " @@ -425,7 +469,9 @@ def bisect_io(graph, marker, attr, filter_const=True, debug_replay=None): G_LOGGER.start(f"Reducing model {attr}") def make_iter_art(context): - iter_graph = graph.copy() # This is a very light-weight copy of the entire graph. + iter_graph = ( + graph.copy() + ) # This is a very light-weight copy of the entire graph. with G_LOGGER.indent(): io_list = list(getattr(iter_graph.nodes[marker.node_index], attr)) @@ -459,7 +505,11 @@ def get_io(index): return names_from_tensors(getattr(graph, attr)) return names_from_tensors(list(getattr(graph.nodes[index], attr))) - return get_io(marker.best_bad_node_index), get_io(marker.best_good_node_index), debug_replay + return ( + get_io(marker.best_bad_node_index), + get_io(marker.best_good_node_index), + debug_replay, + ) # We reduce the model in 2 phases: # 1. Find the earliest output nodes that cause a failure. @@ -479,9 +529,15 @@ def get_io(index): if args.reduce_outputs: out_marker = MarkerType(len(bad_graph.nodes)) bad_outputs, good_outputs, debug_replay = bisect_io( - bad_graph, out_marker, attr="outputs", filter_const=False, debug_replay=debug_replay + bad_graph, + out_marker, + attr="outputs", + filter_const=False, + debug_replay=debug_replay, + ) + bad_graph = cleanup( + mark_io(bad_graph, "outputs", lookup_tensors(bad_graph, bad_outputs)) ) - bad_graph = cleanup(mark_io(bad_graph, "outputs", lookup_tensors(bad_graph, bad_outputs))) if good_graph is not None: good_graph = mark_io( good_graph, "outputs", lookup_tensors(good_graph, good_outputs) @@ -494,7 +550,9 @@ def get_io(index): bad_inputs, good_inputs, debug_replay = bisect_io( bad_graph, in_marker, attr="inputs", debug_replay=debug_replay ) - bad_graph = cleanup(mark_io(bad_graph, "inputs", lookup_tensors(bad_graph, bad_inputs))) + bad_graph = cleanup( + mark_io(bad_graph, "inputs", lookup_tensors(bad_graph, bad_inputs)) + ) if good_graph is not None: good_graph = mark_io( good_graph, "inputs", lookup_tensors(good_graph, good_inputs) @@ -516,7 +574,9 @@ def get_io(index): f"It looks like this model could potentially be reduced further.\nYou may want to reduce {self.arg_groups[OnnxSaveArgs].path} again using --mode=linear. " ) - G_LOGGER.info(f"Minimum Bad Model:\n{onnx_util.str_from_onnx(reduced_model)}\n\n") + G_LOGGER.info( + f"Minimum Bad Model:\n{onnx_util.str_from_onnx(reduced_model)}\n\n" + ) self.arg_groups[OnnxSaveArgs].save_onnx(reduced_model) # == Write Good Model == @@ -528,5 +588,7 @@ def get_io(index): "Could not find a minimal model close in size to the reduced model that does not cause a failure." ) else: - G_LOGGER.info(f"Minimum Good Model:\n{onnx_util.str_from_onnx(min_good_model)}\n\n") + G_LOGGER.info( + f"Minimum Good Model:\n{onnx_util.str_from_onnx(min_good_model)}\n\n" + ) self.arg_groups[OnnxSaveArgs].save_onnx(min_good_model, args.min_good) diff --git a/tools/Polygraphy/polygraphy/tools/debug/subtool/repeat.py b/tools/Polygraphy/polygraphy/tools/debug/subtool/repeat.py index 9336a823..db024e18 100644 --- a/tools/Polygraphy/polygraphy/tools/debug/subtool/repeat.py +++ b/tools/Polygraphy/polygraphy/tools/debug/subtool/repeat.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,7 +16,11 @@ # from polygraphy.tools.base import Tool -from polygraphy.tools.debug.subtool.iterative_debug_args import ArtifactSortArgs, CheckCmdArgs, IterativeDebugArgs +from polygraphy.tools.debug.subtool.iterative_debug_args import ( + ArtifactSortArgs, + CheckCmdArgs, + IterativeDebugArgs, +) class Repeat(Tool): @@ -29,7 +33,11 @@ def __init__(self): super().__init__("repeat") def get_subscriptions_impl(self): - return [CheckCmdArgs(), ArtifactSortArgs(), IterativeDebugArgs(allow_iter_art_opt=False, allow_until_opt=True)] + return [ + CheckCmdArgs(), + ArtifactSortArgs(), + IterativeDebugArgs(allow_iter_art_opt=False, allow_until_opt=True), + ] def show_start_end_logging_impl(self, args): return True diff --git a/tools/Polygraphy/polygraphy/tools/inspect/inspect.py b/tools/Polygraphy/polygraphy/tools/inspect/inspect.py index daa8fdc1..3a6d66be 100644 --- a/tools/Polygraphy/polygraphy/tools/inspect/inspect.py +++ b/tools/Polygraphy/polygraphy/tools/inspect/inspect.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,7 +15,14 @@ # limitations under the License. # from polygraphy.tools.base import Tool -from polygraphy.tools.inspect.subtool import Data, Model, Tactics, Capability, DiffTactics, Sparsity +from polygraphy.tools.inspect.subtool import ( + Data, + Model, + Tactics, + Capability, + DiffTactics, + Sparsity, +) class Inspect(Tool): diff --git a/tools/Polygraphy/polygraphy/tools/inspect/subtool/capability.py b/tools/Polygraphy/polygraphy/tools/inspect/subtool/capability.py index 7f4aa9de..c41697ba 100644 --- a/tools/Polygraphy/polygraphy/tools/inspect/subtool/capability.py +++ b/tools/Polygraphy/polygraphy/tools/inspect/subtool/capability.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,7 +20,12 @@ from polygraphy import mod from polygraphy.common.interface import TypedDict from polygraphy.logger import G_LOGGER -from polygraphy.tools.args import OnnxInferShapesArgs, OnnxLoadArgs, ModelArgs, OnnxSaveArgs +from polygraphy.tools.args import ( + OnnxInferShapesArgs, + OnnxLoadArgs, + ModelArgs, + OnnxSaveArgs, +) from polygraphy.tools.base import Tool common_backend = mod.lazy_import("polygraphy.backend.common") @@ -85,7 +90,9 @@ def supports_model(path): except AttributeError: trt_util.fail_unavailable("supports_model in tensorrt.OnnxParser") - supported, nodelists = parser.supports_model(common_backend.bytes_from_path(path), path) + supported, nodelists = parser.supports_model( + common_backend.bytes_from_path(path), path + ) return supported, nodelists, parser @@ -133,16 +140,24 @@ def save_subgraph(onnx_save_args, graph, start, end, prefix="", use_tmp_file=Fal in_dict = {inp.name: inp for node in subgraph_nodes for inp in node.inputs} # Guess graph inputs/outputs by checking all output tensor names against all input tensor names, and vice-versa. - subgraph_inputs = onnx_util.meta_from_gs_tensors([in_dict[k] for k in in_dict if k not in out_dict]) - subgraph_outputs = onnx_util.meta_from_gs_tensors([out_dict[k] for k in out_dict if k not in in_dict]) + subgraph_inputs = onnx_util.meta_from_gs_tensors( + [in_dict[k] for k in in_dict if k not in out_dict] + ) + subgraph_outputs = onnx_util.meta_from_gs_tensors( + [out_dict[k] for k in out_dict if k not in in_dict] + ) - subgraph = gs.export_onnx(onnx_backend.extract_subgraph(graph, subgraph_inputs, subgraph_outputs)) + subgraph = gs.export_onnx( + onnx_backend.extract_subgraph(graph, subgraph_inputs, subgraph_outputs) + ) if use_tmp_file: path = util.NamedTemporaryFile(prefix=prefix, suffix=".onnx").name else: # end is exclusive, so subtract one to make the model names friendlier. - path = os.path.join(onnx_save_args.path, f"{prefix}_subgraph-nodes-{start}-{end - 1}.onnx") + path = os.path.join( + onnx_save_args.path, f"{prefix}_subgraph-nodes-{start}-{end - 1}.onnx" + ) onnx_save_args.save_onnx(subgraph, path) return path @@ -159,11 +174,17 @@ def gen_results_summary(final_unsupported): str: A summary of all the unsupported ops in model, along with reasons and node index ranges. """ op_width = max(map(len, list(final_unsupported.keys()) + ["Operator "])) - reason_width = max(len(reason) for node_index_map in final_unsupported.values() for reason in node_index_map.keys()) + reason_width = max( + len(reason) + for node_index_map in final_unsupported.values() + for reason in node_index_map.keys() + ) summary = "===== Summary =====\n" - header = f"{'Operator':{op_width}}| {'Count':7} | {'Reason':{reason_width}} | Nodes\n" + header = ( + f"{'Operator':{op_width}}| {'Count':7} | {'Reason':{reason_width}} | Nodes\n" + ) summary += header + "-" * len(header) + "\n" for op, node_index_map in final_unsupported.items(): @@ -183,10 +204,30 @@ def gen_results_summary_no_partitioning(stack_trace_to_errors): Returns: str: A summary of all the unsupported ops in model, along with reasons and stack traces. """ - stack_trace_width = max(map(len, list(stack_trace_to_errors.keys()) + ["Stack trace "])) - op_width = max(max(len(op) for errors_per_stack in stack_trace_to_errors.values() for op, _, _ in errors_per_stack), len("Operator ")) - node_width = max(max(len(node) for errors_per_stack in stack_trace_to_errors.values() for _, node, _ in errors_per_stack), len("Node ")) - reason_width = max(len(reason) for errors_per_stack in stack_trace_to_errors.values() for _, _, reason in errors_per_stack) + stack_trace_width = max( + map(len, list(stack_trace_to_errors.keys()) + ["Stack trace "]) + ) + op_width = max( + max( + len(op) + for errors_per_stack in stack_trace_to_errors.values() + for op, _, _ in errors_per_stack + ), + len("Operator "), + ) + node_width = max( + max( + len(node) + for errors_per_stack in stack_trace_to_errors.values() + for _, node, _ in errors_per_stack + ), + len("Node "), + ) + reason_width = max( + len(reason) + for errors_per_stack in stack_trace_to_errors.values() + for _, _, reason in errors_per_stack + ) summary = "===== Summary =====\n" @@ -210,12 +251,19 @@ def __init__(self): def get_subscriptions_impl(self): return [ - ModelArgs(model_opt_required=True, input_shapes_opt_name=False, required_model_type="onnx"), + ModelArgs( + model_opt_required=True, + input_shapes_opt_name=False, + required_model_type="onnx", + ), OnnxInferShapesArgs(), OnnxLoadArgs(outputs_opt_prefix=False), - OnnxSaveArgs(output_default_path="polygraphy_capability_dumps", allow_multiple_models=True), + OnnxSaveArgs( + output_default_path="polygraphy_capability_dumps", + allow_multiple_models=True, + ), ] - + def add_parser_args_impl(self, parser): parser.add_argument( "--with-partitioning", @@ -232,9 +280,11 @@ def run_impl(self, args): def no_partitioning_variant(self): supported, parser = parse(self.arg_groups[ModelArgs].path) if supported: - G_LOGGER.info("Graph is fully supported by TensorRT; Will not report errors.") + G_LOGGER.info( + "Graph is fully supported by TensorRT; Will not report errors." + ) return - + stack_trace_to_errors = OrderedDict() for err_idx in range(parser.num_errors): parser_error = parser.get_error(err_idx) @@ -244,29 +294,38 @@ def no_partitioning_variant(self): stack_trace += parser_error.local_function_stack()[function_idx] if function_idx != parser_error.local_function_stack_size() - 1: stack_trace += " -> " - + if stack_trace not in stack_trace_to_errors: stack_trace_to_errors[stack_trace] = [] node_operator = parser_error.node_operator() node_name = parser_error.node_name() parser_error_desc = str(parser_error) - stack_trace_to_errors[stack_trace].append(tuple((node_operator, node_name, parser_error_desc))) - + stack_trace_to_errors[stack_trace].append( + tuple((node_operator, node_name, parser_error_desc)) + ) + summary = gen_results_summary_no_partitioning(stack_trace_to_errors) G_LOGGER.info(summary) util.save_file( - summary, os.path.join(self.arg_groups[OnnxSaveArgs].path, "results.txt"), "w", description="results" + summary, + os.path.join(self.arg_groups[OnnxSaveArgs].path, "results.txt"), + "w", + description="results", ) def supports_model_variant(self): supported, nodelists, _ = supports_model(self.arg_groups[ModelArgs].path) if supported: - G_LOGGER.info("Graph is fully supported by TensorRT; Will not generate subgraphs.") + G_LOGGER.info( + "Graph is fully supported by TensorRT; Will not generate subgraphs." + ) return - parent_graph = onnx_backend.gs_from_onnx(self.arg_groups[OnnxLoadArgs].load_onnx()) + parent_graph = onnx_backend.gs_from_onnx( + self.arg_groups[OnnxLoadArgs].load_onnx() + ) def partition(nodelists, offset): """ @@ -284,7 +343,9 @@ def partition(nodelists, offset): supported_subgraphs = [] for node_indices, supported in nodelists: if supported: - supported_subgraphs.append([index + offset for index in node_indices]) + supported_subgraphs.append( + [index + offset for index in node_indices] + ) continue start = node_indices[0] + offset @@ -315,13 +376,23 @@ def save_unsupported_graph(start, end): start (int): The (inclusive) index of the start node. end (int): The (exclusive) index of the end node. """ - subgraph_path = save_subgraph(self.arg_groups[OnnxSaveArgs], parent_graph, start, end, "unsupported") + subgraph_path = save_subgraph( + self.arg_groups[OnnxSaveArgs], parent_graph, start, end, "unsupported" + ) _, _, parser = supports_model(subgraph_path) err_string = ( - " | ".join([str(parser.get_error(err_idx)) for err_idx in range(parser.num_errors)]) or "UNKNOWN ERROR" + " | ".join( + [ + str(parser.get_error(err_idx)) + for err_idx in range(parser.num_errors) + ] + ) + or "UNKNOWN ERROR" + ) + unsupported_node_dict.add( + parent_graph.nodes[start].op, err_string, [start, end] ) - unsupported_node_dict.add(parent_graph.nodes[start].op, err_string, [start, end]) # Log errors for all the unsupported graphs between supported subgraphs. for index, subg_node_idxs in enumerate(supported_subgraphs): @@ -336,7 +407,10 @@ def save_unsupported_graph(start, end): if index == 0 and subg_node_idxs[0] != 0: save_unsupported_graph(0, subg_node_idxs[0]) - if index == len(supported_subgraphs) - 1 and supported_subgraphs[-1][-1] != len(parent_graph.nodes) - 1: + if ( + index == len(supported_subgraphs) - 1 + and supported_subgraphs[-1][-1] != len(parent_graph.nodes) - 1 + ): save_unsupported_graph(subg_node_idxs[-1] + 1, len(parent_graph.nodes)) if index < len(supported_subgraphs) - 1: @@ -347,5 +421,8 @@ def save_unsupported_graph(start, end): G_LOGGER.info(summary) util.save_file( - summary, os.path.join(self.arg_groups[OnnxSaveArgs].path, "results.txt"), "w", description="results" + summary, + os.path.join(self.arg_groups[OnnxSaveArgs].path, "results.txt"), + "w", + description="results", ) diff --git a/tools/Polygraphy/polygraphy/tools/inspect/subtool/data.py b/tools/Polygraphy/polygraphy/tools/inspect/subtool/data.py index 651ed83c..af145769 100644 --- a/tools/Polygraphy/polygraphy/tools/inspect/subtool/data.py +++ b/tools/Polygraphy/polygraphy/tools/inspect/subtool/data.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -37,7 +37,10 @@ def __init__(self): super().__init__("data") def add_parser_args(self, parser): - parser.add_argument("path", help="Path to a file containing input or output data from Polygraphy") + parser.add_argument( + "path", + help="Path to a file containing input or output data from Polygraphy", + ) parser.add_argument( "-a", "--all", @@ -45,9 +48,16 @@ def add_parser_args(self, parser): action="store_true", ) parser.add_argument( - "-s", "--show-values", help="Show values of the tensors instead of just metadata", action="store_true" + "-s", + "--show-values", + help="Show values of the tensors instead of just metadata", + action="store_true", + ) + parser.add_argument( + "--histogram", + help="Show a histogram of the value distribution", + action="store_true", ) - parser.add_argument("--histogram", help="Show a histogram of the value distribution", action="store_true") parser.add_argument( "-n", "--num-items", @@ -88,7 +98,9 @@ def str_from_iters(iters): iter_meta = meta_from_iter_result(iter_result) indent = 1 if len(iters) > 1 and args.all: - out_str += util.indent_block(f"\n-- Iteration: {index}\n", indent - 1) + out_str += util.indent_block( + f"\n-- Iteration: {index}\n", indent - 1 + ) indent = 2 for name, arr in iter_result.items(): diff --git a/tools/Polygraphy/polygraphy/tools/inspect/subtool/diff_tactics.py b/tools/Polygraphy/polygraphy/tools/inspect/subtool/diff_tactics.py index a4af2489..4f5fc1ef 100644 --- a/tools/Polygraphy/polygraphy/tools/inspect/subtool/diff_tactics.py +++ b/tools/Polygraphy/polygraphy/tools/inspect/subtool/diff_tactics.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -54,7 +54,9 @@ def add_parser_args(self, parser): def run_impl(self, args): if args.dir is None and (args.good is None or args.bad is None): - G_LOGGER.critical("Either `--dir`, or both `--good` and `--bad` must be specified.") + G_LOGGER.critical( + "Either `--dir`, or both `--good` and `--bad` must be specified." + ) def load_tactics(dirpath): """ @@ -77,12 +79,16 @@ def try_load_replay(path): tactics = defaultdict(set) replay_paths = [] search_paths = ( - glob.iglob(os.path.join(dirpath, "**"), recursive=True) if os.path.isdir(dirpath) else [dirpath] + glob.iglob(os.path.join(dirpath, "**"), recursive=True) + if os.path.isdir(dirpath) + else [dirpath] ) for path in search_paths: replay = try_load_replay(path) if replay is None: - G_LOGGER.verbose(f"{path} does not look like a tactic replay file, skipping.") + G_LOGGER.verbose( + f"{path} does not look like a tactic replay file, skipping." + ) continue replay_paths.append(path) @@ -113,7 +119,9 @@ def try_load_replay(path): G_LOGGER.info("Found potentially bad tactics:") for name, algo_set in potential_bad_tactics.items(): algo_set_str = list(map(str, algo_set)) - G_LOGGER.info(f"Layer: {name}\n{constants.TAB}Algorithms: {algo_set_str}") + G_LOGGER.info( + f"Layer: {name}\n{constants.TAB}Algorithms: {algo_set_str}" + ) else: G_LOGGER.info( "Could not determine potentially bad tactics. Try providing more tactic replay files if possible." diff --git a/tools/Polygraphy/polygraphy/tools/inspect/subtool/model.py b/tools/Polygraphy/polygraphy/tools/inspect/subtool/model.py index d4489d9a..9fe01241 100644 --- a/tools/Polygraphy/polygraphy/tools/inspect/subtool/model.py +++ b/tools/Polygraphy/polygraphy/tools/inspect/subtool/model.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -88,7 +88,7 @@ def add_parser_args_impl(self, parser): """, action="store_true", default=None, - dest="show_unbounded_dds" + dest="show_unbounded_dds", ) def run_impl(self, args): @@ -100,37 +100,52 @@ def inspect_trt(): with self.arg_groups[TrtLoadEngineArgs].load_engine() as engine: context = engine.create_execution_context() engine_str = trt_util.str_from_engine( - engine, context, show_layers=show("layers"), show_attrs=show("attrs") + engine, + context, + show_layers=show("layers"), + show_attrs=show("attrs"), ) G_LOGGER.info(f"==== TensorRT Engine ====\n{engine_str}") else: - builder, network, parser = util.unpack_args(self.arg_groups[TrtLoadNetworkArgs].load_network(), 3) + builder, network, parser = util.unpack_args( + self.arg_groups[TrtLoadNetworkArgs].load_network(), 3 + ) with contextlib.ExitStack() as stack: stack.enter_context(builder) stack.enter_context(network) if parser: stack.enter_context(parser) network_str = trt_util.str_from_network( - network, show_layers=show("layers"), show_attrs=show("attrs"), show_weights=show("weights") + network, + show_layers=show("layers"), + show_attrs=show("attrs"), + show_weights=show("weights"), ).strip() G_LOGGER.info(f"==== TensorRT Network ====\n{network_str}") def inspect_onnx(): onnx_model = self.arg_groups[OnnxLoadArgs].load_onnx() model_str = onnx_util.str_from_onnx( - onnx_model, show_layers=show("layers"), show_attrs=show("attrs"), show_weights=show("weights") + onnx_model, + show_layers=show("layers"), + show_attrs=show("attrs"), + show_weights=show("weights"), ).strip() G_LOGGER.info(f"==== ONNX Model ====\n{model_str}") if args.show_unbounded_dds: graph = onnx_backend.gs_from_onnx(onnx_model) unbounded_dds_tensors = onnx_util.get_unbounded_dds_tensors(graph) - G_LOGGER.info(f"Found tensors with unbounded DDS: {unbounded_dds_tensors}") - + G_LOGGER.info( + f"Found tensors with unbounded DDS: {unbounded_dds_tensors}" + ) def inspect_tf(): tf_graph, _ = self.arg_groups[TfLoadArgs].load_graph() graph_str = tf_util.str_from_graph( - tf_graph, show_layers=show("layers"), show_attrs=show("attrs"), show_weights=show("weights") + tf_graph, + show_layers=show("layers"), + show_attrs=show("attrs"), + show_weights=show("weights"), ).strip() G_LOGGER.info(f"==== TensorFlow Graph ====\n{graph_str}") @@ -142,5 +157,7 @@ def inspect_tf(): if self.arg_groups[ModelArgs].model_type.is_trt() or args.display_as == "trt": func = inspect_trt if func is None: - G_LOGGER.critical("Could not determine how to display this model. Maybe you need to specify --display-as?") + G_LOGGER.critical( + "Could not determine how to display this model. Maybe you need to specify --display-as?" + ) func() diff --git a/tools/Polygraphy/polygraphy/tools/inspect/subtool/sparsity.py b/tools/Polygraphy/polygraphy/tools/inspect/subtool/sparsity.py index 39197965..b1c99b0f 100644 --- a/tools/Polygraphy/polygraphy/tools/inspect/subtool/sparsity.py +++ b/tools/Polygraphy/polygraphy/tools/inspect/subtool/sparsity.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -32,8 +32,16 @@ def show_start_end_logging_impl(self, args): def get_subscriptions_impl(self): return [ - ModelArgs(model_opt_required=True, input_shapes_opt_name=False, required_model_type="onnx"), - OnnxLoadArgs(allow_shape_inference=False, outputs_opt_prefix=False, allow_from_tf=False), + ModelArgs( + model_opt_required=True, + input_shapes_opt_name=False, + required_model_type="onnx", + ), + OnnxLoadArgs( + allow_shape_inference=False, + outputs_opt_prefix=False, + allow_from_tf=False, + ), ] def run_impl(self, args): diff --git a/tools/Polygraphy/polygraphy/tools/inspect/subtool/tactics.py b/tools/Polygraphy/polygraphy/tools/inspect/subtool/tactics.py index d33b0978..8f3a71c6 100644 --- a/tools/Polygraphy/polygraphy/tools/inspect/subtool/tactics.py +++ b/tools/Polygraphy/polygraphy/tools/inspect/subtool/tactics.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/tools/plugin/plugin.py b/tools/Polygraphy/polygraphy/tools/plugin/plugin.py index 41d1b3a6..de4e44b0 100644 --- a/tools/Polygraphy/polygraphy/tools/plugin/plugin.py +++ b/tools/Polygraphy/polygraphy/tools/plugin/plugin.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/tools/plugin/subtool/list_plugins.py b/tools/Polygraphy/polygraphy/tools/plugin/subtool/list_plugins.py index 2b068fb2..442bb51a 100644 --- a/tools/Polygraphy/polygraphy/tools/plugin/subtool/list_plugins.py +++ b/tools/Polygraphy/polygraphy/tools/plugin/subtool/list_plugins.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,16 +21,14 @@ from polygraphy.tools.plugin.subtool.plugin_base import PluginBase + class ListPlugins(PluginBase): """ Analyze an onnx model for potential plugin substitutions. """ def __init__(self): - super().__init__("list") - + super().__init__(list_plugins=True, name="list") + def add_parser_args_impl(self, parser): super().add_parser_args_impl(parser) - - def run_impl(self, args): - super().match_plugin(args=args, list_plugins=True) diff --git a/tools/Polygraphy/polygraphy/tools/plugin/subtool/match.py b/tools/Polygraphy/polygraphy/tools/plugin/subtool/match.py index 0c7b0a2a..c9a501e0 100644 --- a/tools/Polygraphy/polygraphy/tools/plugin/subtool/match.py +++ b/tools/Polygraphy/polygraphy/tools/plugin/subtool/match.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -28,7 +28,7 @@ class Match(PluginBase): """ def __init__(self): - super().__init__("match") + super().__init__(list_plugins=False, name="match") def add_parser_args_impl(self, parser): super().add_parser_args_impl(parser) @@ -38,6 +38,3 @@ def add_parser_args_impl(self, parser): help="Full path where to save the intermediate file. Defaults to a file called config.yaml in the model directory.", required=False, ) - - def run_impl(self, args): - super().match_plugin(args=args, list_plugins=False) diff --git a/tools/Polygraphy/polygraphy/tools/plugin/subtool/plugin_base.py b/tools/Polygraphy/polygraphy/tools/plugin/subtool/plugin_base.py index b594f5c3..fdecf859 100644 --- a/tools/Polygraphy/polygraphy/tools/plugin/subtool/plugin_base.py +++ b/tools/Polygraphy/polygraphy/tools/plugin/subtool/plugin_base.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,7 +23,13 @@ from polygraphy import mod from polygraphy.logger import G_LOGGER from polygraphy.tools import Tool -from polygraphy.tools.args import DataLoaderArgs, OnnxLoadArgs, ModelArgs, OnnxInferShapesArgs +from polygraphy.tools.args import util as args_util +from polygraphy.tools.args import ( + DataLoaderArgs, + OnnxLoadArgs, + ModelArgs, + OnnxInferShapesArgs, +) import os # Your tool should lazily import any external dependencies. By doing so, @@ -36,15 +42,17 @@ onnx = mod.lazy_import("onnx") yaml = mod.lazy_import("yaml", pkg_name="pyyaml") + class PluginBase(Tool): """ Analyze an onnx model for potential plugin substitutions. """ - GRAPH_PATTERN_FILE_NAME="pattern.py" - def __init__(self, name=None): + GRAPH_PATTERN_FILE_NAME = "pattern.py" + + def __init__(self, list_plugins:bool, name=None): super().__init__(name) - self.plugin_dir = None + self.list_plugins = list_plugins def get_subscriptions_impl(self): return [ @@ -57,26 +65,51 @@ def get_subscriptions_impl(self): def add_parser_args_impl(self, parser): parser.add_argument("--plugin-dir", help="Plugin directory.", required=True) include_exclude = parser.add_mutually_exclusive_group() - include_exclude.add_argument("--include", help="Names of plugins to include. Format: `--include ...`", required=False, nargs="+", type=str, default=[]) - include_exclude.add_argument("--exclude", help="Names of plugins to exclude. Format: `--exclude ...`", required=False, nargs="+", type=str, default=[]) + include_exclude.add_argument( + "--include", + help="Names of plugins to include. Format: `--include ...`", + required=False, + nargs="+", + type=str, + default=[], + ) + include_exclude.add_argument( + "--exclude", + help="Names of plugins to exclude. Format: `--exclude ...`", + required=False, + nargs="+", + type=str, + default=[], + ) def run_impl(self, args): - raise NotImplementedError("run_impl() must be implemented by child classes") - - def match_plugin(self, args, list_plugins=False): - - self.plugin_dir = os.path.abspath(args.plugin_dir) - full_pattern = os.path.join(self.plugin_dir, "*", self.GRAPH_PATTERN_FILE_NAME) - - plugin_set = {os.path.basename(os.path.dirname(x)) for x in glob.glob(pathname=full_pattern, recursive=False)} - - if args.include: - plugin_set.intersection_update(set(args.include)) - - if args.exclude: - plugin_set.difference_update(set(args.exclude)) - - graph = gs.import_onnx(self.arg_groups[OnnxLoadArgs].load_onnx()) + self.match_plugin( + model_file=args.model_file, + plugin_dir=args.plugin_dir, + output_file=args_util.get(args,"output"), + include_list=args.include, + exclude_list=args.exclude, + list_plugins=self.list_plugins + ) + + def match_plugin(self, model_file, plugin_dir, output_file=None, include_list=None, exclude_list=None, list_plugins=False): + """ + find matching subgraphs based on plugin pattern + """ + + plugin_dir = os.path.abspath(plugin_dir) + full_pattern = os.path.join(plugin_dir, "*", self.GRAPH_PATTERN_FILE_NAME) + + plugin_set = { + os.path.basename(os.path.dirname(x)) + for x in glob.glob(pathname=full_pattern, recursive=False) + } + + if include_list: + plugin_set.intersection_update(set(include_list)) + + if exclude_list: + plugin_set.difference_update(set(exclude_list)) # list of plugin substitution instances (conent of config.yaml) out_yaml = [] @@ -87,45 +120,29 @@ def match_plugin(self, args, list_plugins=False): G_LOGGER.info(f"checking {plugin} in model") plugin_yaml = {} - #build pattern from plugin - plugin_pattern_loc = os.path.join(self.plugin_dir, plugin, self.GRAPH_PATTERN_FILE_NAME) - graph_pattern = common_backend.invoke_from_script(plugin_pattern_loc, "get_plugin_pattern") - - matched_subgraphs = graph_pattern.match_all(graph) - if matched_subgraphs: - plugin_frequency[plugin] += len(matched_subgraphs) - - plugin_yaml["name"] = plugin - plugin_yaml["instances"] = [] - - for sg in matched_subgraphs: - def get_names(tensors): - return [tensor.name for tensor in tensors] + plugin_pattern_loc = os.path.join(plugin_dir, plugin, self.GRAPH_PATTERN_FILE_NAME) + # create a new graph in every iteration, in case the pattern matching modifies the graph + graph = gs.import_onnx(self.arg_groups[OnnxLoadArgs].load_onnx()) if self.arg_groups else gs.import_onnx(onnx.load(model_file)) - inputs = get_names(sg.inputs) - outputs = get_names(sg.outputs) - attributes = common_backend.invoke_from_script(plugin_pattern_loc, "get_plugin_attributes", sg) - plugin_yaml["instances"].append({ - "inputs": inputs, - "outputs": outputs, - "attributes": attributes - }) + #get inputs, outputs, attributes from plugin + G_LOGGER.ultra_verbose(f"calling get_matching_subgraphs from {plugin_pattern_loc}") + ioattrs = common_backend.invoke_from_script(plugin_pattern_loc, "get_matching_subgraphs", graph) - out_yaml.append(plugin_yaml) + if ioattrs: + G_LOGGER.ultra_verbose("match found") + plugin_yaml["name"] = common_backend.invoke_from_script(plugin_pattern_loc, "get_plugin_metadata")['name'] + plugin_yaml["op"] = common_backend.invoke_from_script(plugin_pattern_loc, "get_plugin_metadata")['op'] + plugin_yaml["instances"] = ioattrs + out_yaml.append(plugin_yaml) + plugin_frequency[plugin] += len(ioattrs) + G_LOGGER.info("the following plugins matched:") + G_LOGGER.info(plugin_frequency) if list_plugins: - G_LOGGER.info("the following plugins would be used:") - G_LOGGER.info(plugin_frequency) return - config_yaml = os.path.join(os.path.dirname(args.model_file),"config.yaml") - if args.output: - config_yaml = args.output + config_yaml = output_file or os.path.abspath(os.path.join(os.path.dirname(model_file),"config.yaml")) with open(config_yaml, "w") as stream: - yaml.dump_all( - out_yaml, - stream, - default_flow_style=False, - sort_keys=False - ) + yaml.dump_all(out_yaml, stream, default_flow_style=False, sort_keys=False) + G_LOGGER.info(f"Matching subgraphs saved to {config_yaml}") diff --git a/tools/Polygraphy/polygraphy/tools/plugin/subtool/replace.py b/tools/Polygraphy/polygraphy/tools/plugin/subtool/replace.py index 73e95f14..54a578e6 100644 --- a/tools/Polygraphy/polygraphy/tools/plugin/subtool/replace.py +++ b/tools/Polygraphy/polygraphy/tools/plugin/subtool/replace.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -39,29 +39,42 @@ gs = mod.lazy_import("onnx_graphsurgeon>=0.5.0") onnx = mod.lazy_import("onnx") yaml = mod.lazy_import("yaml", pkg_name="pyyaml") +common_backend = mod.lazy_import("polygraphy.backend.common") - -def replace_with_plugin(graph, op, inputs, outputs, attrs=None): +def default_replace_with_plugin(graph, input_tensors: list, output_tensors: list, attrs=None, op=None): """ - replaces a subgraph with a plugin + replaces a subgraph (set of nodes) with a single plugin node + default method to be used when the plugin does not specify a custom replacement method """ - # Disconnect output nodes of all input tensors - for inp in inputs: - inp.outputs.clear() + def issubset_unhashable(list_a: list, list_b: list) -> bool: + """ + Return whether list_a is a subset (or equal to) list_b + The objects in list_a and list_b are unhashable, otherwise set(list_a) <= set(list_b) is enough + """ + return len(list_a) <= len(list_b) and all(a in list_b for a in list_a) + + # Disconnect those output nodes of the input tensors whose inputs are a subset of the input tensors + for in_tensor in input_tensors: + to_remove_nodes = [] + for out_node in in_tensor.outputs: + if issubset_unhashable(out_node.inputs, input_tensors): + to_remove_nodes.append(out_node) + for node in to_remove_nodes: + in_tensor.outputs.remove(node) # Disconnet input nodes of all output tensors - for out in outputs: - out.inputs.clear() - - # Insert the new node. - new_node = graph.layer(op=op, inputs=inputs, outputs=outputs, attrs=attrs) - - # Remove the now-dangling subgraph. + for out_tensor in output_tensors: + to_remove_nodes = [] + for in_node in out_tensor.inputs: + to_remove_nodes.append(in_node) + for node in to_remove_nodes: + out_tensor.inputs.remove(node) + + # Insert the new node + new_node = graph.layer(op=op, inputs=input_tensors, outputs=output_tensors, attrs=attrs) graph.cleanup().toposort() - - return new_node - + return new_node[0].inputs[0] class Replace(Tool): # Polygraphy will use the docstring of the tool child class to generate @@ -69,9 +82,10 @@ class Replace(Tool): """ Replace a subgraph in an onnx model with a plugin. """ + GRAPH_PATTERN_FILE_NAME="pattern.py" def __init__(self): - super().__init__("replace") + super().__init__(name="replace") def get_subscriptions_impl(self): return [ @@ -84,33 +98,58 @@ def get_subscriptions_impl(self): def add_parser_args_impl(self, parser): parser.add_argument("--plugin-dir", help="Plugin directory.", required=True) parser.add_argument( - "-o", "--output", help="Where to save the modified model", required=True + "-o", "--output", help="Where to save the modified model", required=False ) parser.add_argument("--config", help="location of config.yaml.") def run_impl(self, args): - graph = gs.import_onnx(self.arg_groups[OnnxLoadArgs].load_onnx()) - tmap = graph.tensors() - config_yaml = os.path.join(os.path.dirname(args.model_file), "config.yaml") - if args.config: - config_yaml = args.config - + self.replace_plugin( + model_file=args.model_file, + plugin_dir=args.plugin_dir, + output=args.output, + config=args.config + ) + def replace_plugin(self, model_file, plugin_dir, output=None, config=None): + graph = gs.import_onnx(self.arg_groups[OnnxLoadArgs].load_onnx()) if self.arg_groups else gs.import_onnx(onnx.load(model_file)) + + tensor_map = graph.tensors() + config_yaml = config or os.path.join(os.path.dirname(model_file), "config.yaml") + + plugin_dir = os.path.abspath(plugin_dir) + with open(config_yaml, "r") as stream: in_yaml = yaml.safe_load_all(stream) for plugin in in_yaml: plugin_name = plugin["name"] - for instance in plugin["instances"]: - inputs = [tmap[tensor_name] for tensor_name in instance["inputs"]] - outputs = [tmap[tensor_name] for tensor_name in instance["outputs"]] - attrs = instance["attributes"] - - replace_with_plugin( - graph=graph, - op=plugin_name, - inputs=inputs, - outputs=outputs, - attrs=attrs, + plugin_op = plugin["op"] + G_LOGGER.ultra_verbose(f"replacing {plugin_name}...") + plugin_pattern_loc = os.path.join(plugin_dir, plugin_name, self.GRAPH_PATTERN_FILE_NAME) + # if the plugin provides a custom replacement method, use that + replace_fn = default_replace_with_plugin + try: + replace_fn = mod.import_from_script( + plugin_pattern_loc, + "replace_with_plugin" ) + except: + pass - onnx.save(gs.export_onnx(graph), args.output) + replace_cnt = 0 + for instance in plugin["instances"]: + attrs = instance.get("attributes", None) + if replace_fn( + graph=graph, + input_tensors=[tensor_map[ip_tensor_name] for ip_tensor_name in instance["inputs"]], + output_tensors=[tensor_map[op_tensor_name] for op_tensor_name in instance["outputs"]], + attrs=attrs, + op=plugin_op + ): + replace_cnt += 1 + G_LOGGER.info(f"replaced {replace_cnt} instances of {plugin_name} plugin") + if replace_cnt != len(plugin['instances']): + G_LOGGER.warning(f"Warning: not all instances of {plugin_name} were replaced!") + + output_onnx = output or os.path.join(os.path.dirname(model_file), "replaced.onnx") + + onnx.save(gs.export_onnx(graph), output_onnx) diff --git a/tools/Polygraphy/polygraphy/tools/registry.py b/tools/Polygraphy/polygraphy/tools/registry.py index ed4597d4..efb655e5 100644 --- a/tools/Polygraphy/polygraphy/tools/registry.py +++ b/tools/Polygraphy/polygraphy/tools/registry.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -28,9 +28,7 @@ def __init__(self, name, err): self.err = err # NOTE: When modifying this error message, make sure to update the checks in # tests/test_dependencies.py so that we don't miss errors! - self.__doc__ = ( - f"[!] This tool could not be loaded due to an error:\n{self.err}\nRun 'polygraphy {self.name}' for details." - ) + self.__doc__ = f"[!] This tool could not be loaded due to an error:\n{self.err}\nRun 'polygraphy {self.name}' for details." def __call__(self, args): G_LOGGER.critical(f"Encountered an error when loading this tool:\n{self.err}") @@ -44,7 +42,9 @@ def try_register_tool(module, tool_class): ToolClass = getattr(toolmod, tool_class) TOOL_REGISTRY.append(ToolClass()) except Exception as err: - G_LOGGER.internal_error(f"Could not load command-line tool: {tool_class.lower()}.\nNote: Error was: {err}") + G_LOGGER.internal_error( + f"Could not load command-line tool: {tool_class.lower()}.\nNote: Error was: {err}" + ) TOOL_REGISTRY.append(MissingTool(tool_class.lower(), err=err)) @@ -62,4 +62,6 @@ def try_register_tool(module, tool_class): tool_names = [tool.name for tool in TOOL_REGISTRY] duplicates = {name for name in tool_names if tool_names.count(name) > 1} if duplicates: - G_LOGGER.internal_error(f"Multiple tools have the same name. Duplicate tool names found: {duplicates}") + G_LOGGER.internal_error( + f"Multiple tools have the same name. Duplicate tool names found: {duplicates}" + ) diff --git a/tools/Polygraphy/polygraphy/tools/run/run.py b/tools/Polygraphy/polygraphy/tools/run/run.py index 2595d90e..9cb32d6a 100644 --- a/tools/Polygraphy/polygraphy/tools/run/run.py +++ b/tools/Polygraphy/polygraphy/tools/run/run.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -87,7 +87,9 @@ def join_list(lst): summary += join_list(runners) + "." if load_results: - summary += f"\nIt will check against outputs stored in {join_list(load_results)}\n" + summary += ( + f"\nIt will check against outputs stored in {join_list(load_results)}\n" + ) return summary @@ -163,7 +165,9 @@ def get_subscriptions_impl(self): get_arg_groups_func = plugin.load() plugin_arg_groups = get_arg_groups_func() except Exception as err: - G_LOGGER.warning(f"Failed to load plugin: {plugin.name}.\nNote: Error was:\n{err}") + G_LOGGER.warning( + f"Failed to load plugin: {plugin.name}.\nNote: Error was:\n{err}" + ) else: deps.extend(plugin_arg_groups) self.loaded_plugins.append(plugin.name) @@ -187,7 +191,10 @@ def show_start_end_logging_impl(self, args): def run_impl(self, args): G_LOGGER.verbose(f"Loaded extension modules: {self.loaded_plugins}") - if self.arg_groups[ModelArgs].path is None and self.arg_groups[RunnerSelectArgs].runners: + if ( + self.arg_groups[ModelArgs].path is None + and self.arg_groups[RunnerSelectArgs].runners + ): G_LOGGER.critical( "One or more runners was specified, but no model file was provided. Make sure you've specified the model path, " "and also that it's not being consumed as an argument for another parameter" @@ -206,7 +213,9 @@ def run_impl(self, args): self.arg_groups[RunnerSelectArgs].add_to_script(script) RESULTS_VAR_NAME = self.arg_groups[ComparatorRunArgs].add_to_script(script) - SUCCESS_VAR_NAME = self.arg_groups[ComparatorCompareArgs].add_to_script(script, results_name=RESULTS_VAR_NAME) + SUCCESS_VAR_NAME = self.arg_groups[ComparatorCompareArgs].add_to_script( + script, results_name=RESULTS_VAR_NAME + ) script.add_import(imports=["PolygraphyException"], frm="polygraphy.exception") exit_status = safe( diff --git a/tools/Polygraphy/polygraphy/tools/script.py b/tools/Polygraphy/polygraphy/tools/script.py index 80ea1004..5dde1114 100644 --- a/tools/Polygraphy/polygraphy/tools/script.py +++ b/tools/Polygraphy/polygraphy/tools/script.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -68,9 +68,13 @@ def ensure_safe(inp): Ensures that the input is marked as a safe string (i.e. Script.String(safe=True)). """ if not isinstance(inp, Script.String): - G_LOGGER.internal_error(f"Input to ensure_safe must be of type Script.String, but was: {inp}") + G_LOGGER.internal_error( + f"Input to ensure_safe must be of type Script.String, but was: {inp}" + ) elif not inp.safe: - G_LOGGER.internal_error(f"Input string: {inp} was not checked for safety. This is a potential security risk!") + G_LOGGER.internal_error( + f"Input string: {inp} was not checked for safety. This is a potential security risk!" + ) return inp @@ -117,8 +121,14 @@ def make_invocable_impl(type_str, *args, **kwargs): """ # We don't need to check obj_str for safety since we know that any inline # args/kwargs are already safe - other types need no checks - obj_str, all_args_default, all_kwargs_default = util.make_repr(type_str, *args, **kwargs) - return Script.String(obj_str, safe=True, inline=True), all_args_default, all_kwargs_default + obj_str, all_args_default, all_kwargs_default = util.make_repr( + type_str, *args, **kwargs + ) + return ( + Script.String(obj_str, safe=True, inline=True), + all_args_default, + all_kwargs_default, + ) @mod.export() @@ -162,7 +172,9 @@ def make_invocable_if_nondefault(type_str, *args, **kwargs): >>> make_invocable_if_nondefault("my_func", None, None, last=None) None """ - obj_str, all_args_default, all_kwargs_default = make_invocable_impl(type_str, *args, **kwargs) + obj_str, all_args_default, all_kwargs_default = make_invocable_impl( + type_str, *args, **kwargs + ) if all_args_default and all_kwargs_default: return None return obj_str @@ -221,9 +233,13 @@ def __repr__(self): def __iadd__(self, other): if not isinstance(other, Script.String): - G_LOGGER.internal_error(f"Cannot concatenate str and Script.String. Note: str was: {other}") + G_LOGGER.internal_error( + f"Cannot concatenate str and Script.String. Note: str was: {other}" + ) elif self.safe != other.safe: - G_LOGGER.internal_error(f"Cannot concatenate unsafe string ({other}) to safe string ({self.s})!") + G_LOGGER.internal_error( + f"Cannot concatenate unsafe string ({other}) to safe string ({self.s})!" + ) self.s += other.s return self @@ -258,8 +274,12 @@ def __init__(self, summary=None, always_create_runners=True): Whether to create the list of runners even if it would be empty. """ self.imports = {} # Dict[str, Set[str, str]]: Maps from: {(import, as), ...} - self.loaders = OrderedDict() # Dict[str, str] Maps a string constructing a loader to a name. - self.loader_count = defaultdict(int) # Dict[str, int] Maps loader_id to the number of loaders sharing that ID + self.loaders = ( + OrderedDict() + ) # Dict[str, str] Maps a string constructing a loader to a name. + self.loader_count = defaultdict( + int + ) # Dict[str, int] Maps loader_id to the number of loaders sharing that ID self.runners = [] # List[str] self.preimport = [] # List[str] self.suffix = [] # List[str] @@ -287,7 +307,9 @@ def add_import(self, imports, frm=None, imp_as=None): imports = {imports} if imp_as and len(imports) > 1: - G_LOGGER.internal_error("When `imp_as` is specified, `imports` must be a string and not a list") + G_LOGGER.internal_error( + "When `imp_as` is specified, `imports` must be a string and not a list" + ) if frm not in self.imports: self.imports[frm] = set() @@ -383,7 +405,11 @@ def __str__(self): script += f"# Generation Command: {' '.join(sys.argv)}\n" if self.summary: script += "# " + "\n# ".join(self.summary.splitlines()) + "\n" - script += ("\n" if self.preimport else "") + "\n".join(self.preimport) + ("\n\n" if self.preimport else "") + script += ( + ("\n" if self.preimport else "") + + "\n".join(self.preimport) + + ("\n\n" if self.preimport else "") + ) has_external_import = False imports = [] @@ -392,7 +418,10 @@ def __str__(self): is_external_import = False if frm is not None: # NOTE: We do not currently translate 'from' imports to `lazy_import`. - imps = [f"{imp}" if imp_as is None else f"{imp} as {imp_as}" for imp, imp_as in imps] + imps = [ + f"{imp}" if imp_as is None else f"{imp} as {imp_as}" + for imp, imp_as in imps + ] imports.append(f"from {frm} import {', '.join(imps)}") else: # When `frm` is None, we want to treat each import separately. @@ -403,7 +432,9 @@ def __str__(self): imp_as = imp_as or imp imports.append(f"{imp_as} = mod.lazy_import({repr(imp)})") else: - imports.append(f"import {imp}{'' if imp_as is None else f' as {imp_as}'}") + imports.append( + f"import {imp}{'' if imp_as is None else f' as {imp_as}'}" + ) has_external_import |= is_external_import if has_external_import: diff --git a/tools/Polygraphy/polygraphy/tools/sparse.py b/tools/Polygraphy/polygraphy/tools/sparse.py index 2cdbbfff..92a64003 100644 --- a/tools/Polygraphy/polygraphy/tools/sparse.py +++ b/tools/Polygraphy/polygraphy/tools/sparse.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -75,15 +75,21 @@ def __tensor(self, t, axis): if t in self.tname2producer: producer = self.tname2producer[t] if producer.op_type in axis_insensitive_op_type: - G_LOGGER.ultra_verbose(f"({t}) is produced by {producer.op_type}, looking back") + G_LOGGER.ultra_verbose( + f"({t}) is produced by {producer.op_type}, looking back" + ) self.__tensor(producer.input[0], axis) elif producer.op_type == "Transpose": - G_LOGGER.ultra_verbose(f"({t}) is produced by {producer.op_type}, checking attributes") + G_LOGGER.ultra_verbose( + f"({t}) is produced by {producer.op_type}, checking attributes" + ) for attr in producer.attribute: if attr.name == "perm": perm = list(attr.ints) new_axis = perm.index(axis) - G_LOGGER.ultra_verbose(f"attribute is {perm}, axis {axis} -> {new_axis}") + G_LOGGER.ultra_verbose( + f"attribute is {perm}, axis {axis} -> {new_axis}" + ) self.__tensor(producer.input[0], new_axis) return G_LOGGER.warning(f"{producer.op_type} doesn't have attribute!") @@ -92,7 +98,9 @@ def __tensor(self, t, axis): f"({t}) produced by {producer.name} type {producer.op_type}. Stopping backward analysis." ) else: - G_LOGGER.warning(f"({t}) produced by {producer.name} type: {producer.op_type} is unsupported!") + G_LOGGER.warning( + f"({t}) produced by {producer.name} type: {producer.op_type} is unsupported!" + ) def __conv(self, node): assert node.op_type == "Conv" @@ -133,7 +141,9 @@ def _walk_nodes(self): count = len(self.g.node) for i in range(count): n = self.g.node[i] - G_LOGGER.super_verbose(f"Processing node {i}/{count} ({n.op_type}): {n.name}") + G_LOGGER.super_verbose( + f"Processing node {i}/{count} ({n.op_type}): {n.name}" + ) if n.op_type == "MatMul": self.__matmul(n) elif n.op_type == "Gemm": @@ -239,7 +249,10 @@ def short2long(idx): zeros = 0 if is_raw_data: for i in range(step): - if data[short2long(i) * 2] == 0 and data[short2long(i) * 2 + 1] == 0: + if ( + data[short2long(i) * 2] == 0 + and data[short2long(i) * 2 + 1] == 0 + ): zeros += 1 else: i32_data_0 = data[short2long(0)] @@ -251,7 +264,9 @@ def bf16_zeros_in_int32(v): v1_zero = 1 if bf16_data_1 == 0 else 0 return v0_zero + v1_zero - zeros = bf16_zeros_in_int32(i32_data_0) + bf16_zeros_in_int32(i32_data_0) + zeros = bf16_zeros_in_int32(i32_data_0) + bf16_zeros_in_int32( + i32_data_0 + ) if zeros < 2: G_LOGGER.warning(f"Found non-sparse tensor: {tensor.name}") return False @@ -289,7 +304,9 @@ def process_tensor(pinfo, tensor, check): outer *= dims[i] for i in range(axis + 1, len(tensor.dims), 1): pstride *= dims[i] - G_LOGGER.ultra_verbose(f"axis {axis} of dims {dims} has stride {pstride} and outer {outer}") + G_LOGGER.ultra_verbose( + f"axis {axis} of dims {dims} has stride {pstride} and outer {outer}" + ) # We need hacks since BF16 has not been fully enabled in Numpy or ONNX. if tensor.data_type is onnx.TensorProto.BFLOAT16: diff --git a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/base.py b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/base.py index 5a8991da..d60643cf 100644 --- a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/base.py +++ b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/base.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/extract.py b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/extract.py index fac44dd4..49259be9 100644 --- a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/extract.py +++ b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/extract.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,7 +19,13 @@ from polygraphy import mod from polygraphy.common import TensorMetadata from polygraphy.logger import G_LOGGER -from polygraphy.tools.args import DataLoaderArgs, ModelArgs, OnnxInferShapesArgs, OnnxLoadArgs, OnnxSaveArgs +from polygraphy.tools.args import ( + DataLoaderArgs, + ModelArgs, + OnnxInferShapesArgs, + OnnxLoadArgs, + OnnxSaveArgs, +) from polygraphy.tools.args import util as args_util from polygraphy.tools.surgeon.subtool.base import BaseSurgeonSubtool from polygraphy.datatype import DataType @@ -93,7 +99,9 @@ def missing_meta_tensors(input_metadata, output_metadata): model = super().load_model() user_input_metadata = args_util.parse_meta(args.input_meta) - user_output_metadata = args_util.parse_meta(args.output_meta, includes_shape=False) + user_output_metadata = args_util.parse_meta( + args.output_meta, includes_shape=False + ) # Loads an ONNX-GS graph and create new I/O metadata w/ info missing in user_input/output_metadata. def load_graph_and_io_meta(model): @@ -128,7 +136,10 @@ def make_io_meta(user_meta, tensors): self.arg_groups[OnnxInferShapesArgs].force_fallback or self.arg_groups[OnnxInferShapesArgs].do_shape_inference ) - if missing_meta_tensors(input_metadata, output_metadata) and not skip_shape_inference: + if ( + missing_meta_tensors(input_metadata, output_metadata) + and not skip_shape_inference + ): G_LOGGER.info( "Running shape inference to derive shapes and/or data types for `auto` arguments.\n" "To avoid this, you can specify the shapes and data types explicitly." @@ -147,10 +158,13 @@ def make_io_meta(user_meta, tensors): "\nTo avoid this, please provide metadata on the command-line. " ) else: - G_LOGGER.info("Forcing fallback shape inference. This will cause dynamic dimensions to become static.") + G_LOGGER.info( + "Forcing fallback shape inference. This will cause dynamic dimensions to become static." + ) _, layerwise_meta = self.arg_groups[OnnxInferShapesArgs].fallback_inference( - model, outputs=list(input_metadata.keys()) + list(output_metadata.keys()) + model, + outputs=list(input_metadata.keys()) + list(output_metadata.keys()), ) def update_meta_from_layerwise(meta, user_meta, set_shapes=True): @@ -164,18 +178,31 @@ def choose_meta(user, model, fallback): user_dtype, user_shape = None, None if name in user_meta: - user_dtype, user_shape = user_meta[name].dtype, user_meta[name].shape + user_dtype, user_shape = ( + user_meta[name].dtype, + user_meta[name].shape, + ) - meta[name].dtype = choose_meta(user_dtype, meta[name].dtype, layerwise_meta[name].dtype) + meta[name].dtype = choose_meta( + user_dtype, meta[name].dtype, layerwise_meta[name].dtype + ) if set_shapes: - meta[name].shape = choose_meta(user_shape, meta[name].shape, layerwise_meta[name].shape) - G_LOGGER.verbose(f"Updated tensor: {name} metadata to: {meta[name]}") + meta[name].shape = choose_meta( + user_shape, meta[name].shape, layerwise_meta[name].shape + ) + G_LOGGER.verbose( + f"Updated tensor: {name} metadata to: {meta[name]}" + ) return meta - input_metadata = update_meta_from_layerwise(input_metadata, user_input_metadata) + input_metadata = update_meta_from_layerwise( + input_metadata, user_input_metadata + ) output_metadata = update_meta_from_layerwise( - output_metadata, user_output_metadata, set_shapes=self.arg_groups[OnnxInferShapesArgs].force_fallback + output_metadata, + user_output_metadata, + set_shapes=self.arg_groups[OnnxInferShapesArgs].force_fallback, ) graph = onnx_backend.extract_subgraph(graph, input_metadata, output_metadata) diff --git a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/insert.py b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/insert.py index 3161c883..94747672 100644 --- a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/insert.py +++ b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/insert.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,7 +16,12 @@ # from polygraphy import mod from polygraphy.logger import G_LOGGER -from polygraphy.tools.args import ModelArgs, OnnxInferShapesArgs, OnnxLoadArgs, OnnxSaveArgs +from polygraphy.tools.args import ( + ModelArgs, + OnnxInferShapesArgs, + OnnxLoadArgs, + OnnxSaveArgs, +) from polygraphy.tools.args import util as args_util from polygraphy.tools.args.base import BaseArgs from polygraphy.tools.surgeon.subtool.base import BaseSurgeonSubtool @@ -46,8 +51,12 @@ def add_parser_args_impl(self): nargs="+", required=True, ) - self.group.add_argument("--op", help="The ONNX op to use for the new node", required=True) - self.group.add_argument("--name", help="The name to use for the new node", default=None) + self.group.add_argument( + "--op", help="The ONNX op to use for the new node", required=True + ) + self.group.add_argument( + "--name", help="The name to use for the new node", default=None + ) self.group.add_argument( "--attrs", help="Attributes to set in the new node. " @@ -64,7 +73,9 @@ def parse_impl(self, args): self.op = args_util.get(args, "op") self.name = args_util.get(args, "name") - self.attrs = args_util.parse_arglist_to_dict(args_util.get(args, "attrs"), sep="=") + self.attrs = args_util.parse_arglist_to_dict( + args_util.get(args, "attrs"), sep="=" + ) self.inputs = args_util.get(args, "inputs") self.outputs = args_util.get(args, "outputs") @@ -81,7 +92,11 @@ def __init__(self): def get_subscriptions_impl(self): return [ OnnxNodeArgs(), - ModelArgs(model_opt_required=True, input_shapes_opt_name=False, required_model_type="onnx"), + ModelArgs( + model_opt_required=True, + input_shapes_opt_name=False, + required_model_type="onnx", + ), OnnxInferShapesArgs(), OnnxLoadArgs(outputs_opt_prefix=False), OnnxSaveArgs(allow_shape_inference=True, output_opt_required=True), @@ -129,7 +144,9 @@ def replace_tensor(tensors): tensor.inputs.clear() output_tensors.append(tensor) - input_tensors = [get_tensor(name) for name in self.arg_groups[OnnxNodeArgs].inputs] + input_tensors = [ + get_tensor(name) for name in self.arg_groups[OnnxNodeArgs].inputs + ] new_node = gs.Node( op=self.arg_groups[OnnxNodeArgs].op, @@ -144,7 +161,9 @@ def replace_tensor(tensors): # after its last input node to maintain the sorting. with graph.node_ids(): # Nodes with no inputs can be inserted at index 0 - insert_index = max([node.id + 1 for inp in input_tensors for node in inp.inputs] + [0]) + insert_index = max( + [node.id + 1 for inp in input_tensors for node in inp.inputs] + [0] + ) graph.nodes.insert(insert_index, new_node) diff --git a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/prune.py b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/prune.py index c48d0ff2..6b5d32de 100644 --- a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/prune.py +++ b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/prune.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -36,8 +36,16 @@ def show_start_end_logging_impl(self, args): def get_subscriptions_impl(self): return [ - ModelArgs(model_opt_required=True, input_shapes_opt_name=False, required_model_type="onnx"), - OnnxLoadArgs(allow_shape_inference=False, outputs_opt_prefix=False, allow_from_tf=False), + ModelArgs( + model_opt_required=True, + input_shapes_opt_name=False, + required_model_type="onnx", + ), + OnnxLoadArgs( + allow_shape_inference=False, + outputs_opt_prefix=False, + allow_from_tf=False, + ), OnnxSaveArgs(allow_shape_inference=False, output_opt_required=True), ] diff --git a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/sanitize.py b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/sanitize.py index a185542b..a8b141c2 100644 --- a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/sanitize.py +++ b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/sanitize.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,7 +17,13 @@ from polygraphy import mod, util from polygraphy.logger import G_LOGGER from polygraphy.tools import util as tools_util -from polygraphy.tools.args import DataLoaderArgs, ModelArgs, OnnxInferShapesArgs, OnnxLoadArgs, OnnxSaveArgs +from polygraphy.tools.args import ( + DataLoaderArgs, + ModelArgs, + OnnxInferShapesArgs, + OnnxLoadArgs, + OnnxSaveArgs, +) from polygraphy.tools.args import util as args_util from polygraphy.tools.args.base import BaseArgs from polygraphy.tools.script import make_invocable @@ -108,7 +114,9 @@ def parse_impl(self, args): self.partitioning = args_util.get(args, "partitioning") self.fold_shapes = args_util.get(args, "fold_shapes") self.per_pass_shape_inference = args_util.get(args, "per_pass_shape_inference") - self.size_threshold = args_util.parse_num_bytes(args_util.get(args, "fold_size_threshold")) + self.size_threshold = args_util.parse_num_bytes( + args_util.get(args, "fold_size_threshold") + ) if not self.fold_constants: for arg in [ @@ -135,13 +143,18 @@ def add_to_script_impl(self, script, loader_name): "FoldConstants", loader_name, num_passes=self.num_passes, - do_shape_inference=self.arg_groups[OnnxInferShapesArgs].do_shape_inference - if self.per_pass_shape_inference is not False # since `None` indicates default value - else False, + do_shape_inference=( + self.arg_groups[OnnxInferShapesArgs].do_shape_inference + if self.per_pass_shape_inference + is not False # since `None` indicates default value + else False + ), fold_shapes=self.fold_shapes, partitioning=self.partitioning, size_threshold=self.size_threshold, - allow_onnxruntime_shape_inference=self.arg_groups[OnnxInferShapesArgs].allow_onnxruntime, + allow_onnxruntime_shape_inference=self.arg_groups[ + OnnxInferShapesArgs + ].allow_onnxruntime, ), "fold_constants", ) @@ -214,7 +227,9 @@ def get_graph(): rerun_shape_inference = True if self.arg_groups[OnnxInferShapesArgs].force_fallback: - _, layerwise_meta = self.arg_groups[OnnxInferShapesArgs].fallback_inference(model) + _, layerwise_meta = self.arg_groups[ + OnnxInferShapesArgs + ].fallback_inference(model) graph = get_graph() onnx_util.set_shapes_from_layerwise_meta(graph, layerwise_meta) diff --git a/tools/Polygraphy/polygraphy/tools/surgeon/surgeon.py b/tools/Polygraphy/polygraphy/tools/surgeon/surgeon.py index 0acbc94c..02bf0615 100644 --- a/tools/Polygraphy/polygraphy/tools/surgeon/surgeon.py +++ b/tools/Polygraphy/polygraphy/tools/surgeon/surgeon.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,7 +15,14 @@ # limitations under the License. # from polygraphy.tools.base import Tool -from polygraphy.tools.surgeon.subtool import Extract, Insert, Sanitize, Prune, WeightStripper, WeightReconstructor +from polygraphy.tools.surgeon.subtool import ( + Extract, + Insert, + Sanitize, + Prune, + WeightStripper, + WeightReconstructor, +) ################################# MAIN TOOL ################################# diff --git a/tools/Polygraphy/polygraphy/tools/template/subtool/base.py b/tools/Polygraphy/polygraphy/tools/template/subtool/base.py index cf6cd802..8aa39c25 100644 --- a/tools/Polygraphy/polygraphy/tools/template/subtool/base.py +++ b/tools/Polygraphy/polygraphy/tools/template/subtool/base.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/tools/template/subtool/onnx_gs.py b/tools/Polygraphy/polygraphy/tools/template/subtool/onnx_gs.py index 023b0f80..64f0981a 100644 --- a/tools/Polygraphy/polygraphy/tools/template/subtool/onnx_gs.py +++ b/tools/Polygraphy/polygraphy/tools/template/subtool/onnx_gs.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -44,9 +44,13 @@ def run_impl(self, args): script.add_import(imports="GsFromOnnx", frm="polygraphy.backend.onnx") loader_name = self.arg_groups[OnnxLoadArgs].add_to_script(script) - loader_name = script.add_loader(make_invocable("GsFromOnnx", loader_name), "load_gs") + loader_name = script.add_loader( + make_invocable("GsFromOnnx", loader_name), "load_gs" + ) - new_model_path = util.add_file_suffix(self.arg_groups[ModelArgs].path, "_updated") + new_model_path = util.add_file_suffix( + self.arg_groups[ModelArgs].path, "_updated" + ) content = safe( dedent( diff --git a/tools/Polygraphy/polygraphy/tools/template/subtool/trt_config.py b/tools/Polygraphy/polygraphy/tools/template/subtool/trt_config.py index f2a16da7..24e3131d 100644 --- a/tools/Polygraphy/polygraphy/tools/template/subtool/trt_config.py +++ b/tools/Polygraphy/polygraphy/tools/template/subtool/trt_config.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -37,7 +37,10 @@ def get_subscriptions_impl(self): ] def run_impl(self, args): - script = Script(summary="Creates a TensorRT Builder Configuration.", always_create_runners=False) + script = Script( + summary="Creates a TensorRT Builder Configuration.", + always_create_runners=False, + ) script.add_import(imports=["func"], frm="polygraphy") script.add_import(imports="tensorrt", imp_as="trt") diff --git a/tools/Polygraphy/polygraphy/tools/template/subtool/trt_network.py b/tools/Polygraphy/polygraphy/tools/template/subtool/trt_network.py index 8796fb39..fe31a869 100644 --- a/tools/Polygraphy/polygraphy/tools/template/subtool/trt_network.py +++ b/tools/Polygraphy/polygraphy/tools/template/subtool/trt_network.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -49,7 +49,10 @@ def get_subscriptions_impl(self): ] def run_impl(self, args): - script = Script(summary="Creates a TensorRT Network using the Network API.", always_create_runners=False) + script = Script( + summary="Creates a TensorRT Network using the Network API.", + always_create_runners=False, + ) script.add_import(imports=["func"], frm="polygraphy") script.add_import(imports="tensorrt", imp_as="trt") @@ -64,7 +67,9 @@ def run_impl(self, args): script.append_suffix(safe("@func.extend({:})", inline(loader_name))) script.append_suffix(safe("def load_network({:}):", inline(params))) script.append_suffix( - safe(f"{constants.TAB}pass # TODO: Set up the network here. This function should not return anything.") + safe( + f"{constants.TAB}pass # TODO: Set up the network here. This function should not return anything." + ) ) script.save(args.output) diff --git a/tools/Polygraphy/polygraphy/tools/template/template.py b/tools/Polygraphy/polygraphy/tools/template/template.py index 4fab5973..5c899493 100644 --- a/tools/Polygraphy/polygraphy/tools/template/template.py +++ b/tools/Polygraphy/polygraphy/tools/template/template.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/polygraphy/tools/util.py b/tools/Polygraphy/polygraphy/tools/util.py index 9ead6c29..7ac5a27a 100644 --- a/tools/Polygraphy/polygraphy/tools/util.py +++ b/tools/Polygraphy/polygraphy/tools/util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -37,7 +37,9 @@ def override_input_shapes(graph, user_input_metadata): input_metadata.update(user_input_metadata) graph = onnx_backend.extract_subgraph(graph, input_metadata) - G_LOGGER.info(f"Overriding input shapes to:\n{onnx_util.meta_from_gs_tensors(graph.inputs)}") + G_LOGGER.info( + f"Overriding input shapes to:\n{onnx_util.meta_from_gs_tensors(graph.inputs)}" + ) # Have to unset intermediate shapes as they may cause problems. tensors = graph.tensors() diff --git a/tools/Polygraphy/polygraphy/util/array.py b/tools/Polygraphy/polygraphy/util/array.py index 4381ccf6..57231ff8 100644 --- a/tools/Polygraphy/polygraphy/util/array.py +++ b/tools/Polygraphy/polygraphy/util/array.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -528,9 +528,9 @@ def numpy_impl(obj, shape): return { "numpy": numpy_impl, "torch": lambda obj, shape: obj.resize_(shape) if shape != obj.shape else obj, - "device_view": lambda obj, shape: obj.resize(shape) - if shape != obj.shape - else obj, + "device_view": lambda obj, shape: ( + obj.resize(shape) if shape != obj.shape else obj + ), } diff --git a/tools/Polygraphy/polygraphy/util/util.py b/tools/Polygraphy/polygraphy/util/util.py index 7a28357c..ae1c74e0 100644 --- a/tools/Polygraphy/polygraphy/util/util.py +++ b/tools/Polygraphy/polygraphy/util/util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -73,7 +73,13 @@ def find_str_in_iterable(name, seq, index=None): @mod.export() def check_sequence_contains( - sequence, items, name=None, items_name=None, log_func=None, check_missing=None, check_extra=None + sequence, + items, + name=None, + items_name=None, + log_func=None, + check_missing=None, + check_extra=None, ): """ Checks that a sequence contains the provided items and also @@ -248,7 +254,10 @@ def default(value, default): @mod.export() def is_sequence(obj): return ( - hasattr(obj, "__iter__") and not isinstance(obj, dict) and not isinstance(obj, set) and not isinstance(obj, str) + hasattr(obj, "__iter__") + and not isinstance(obj, dict) + and not isinstance(obj, set) + and not isinstance(obj, str) ) @@ -294,7 +303,9 @@ def __init__(self, mode=None, prefix=None, suffix=None): suffix = default(suffix, "") def rand_path(): - return os.path.join(tempfile.gettempdir(), f"{prefix}{os.urandom(24).hex()}{suffix}") + return os.path.join( + tempfile.gettempdir(), f"{prefix}{os.urandom(24).hex()}{suffix}" + ) # In the unlikely event the path exists, generate a new one. Only try 100 times so # we don't end up in an infinite loop. @@ -304,7 +315,9 @@ def rand_path(): break path = rand_path() else: - G_LOGGER.critical(f"Could not create a temporary file under: {tempfile.gettempdir()}") + G_LOGGER.critical( + f"Could not create a temporary file under: {tempfile.gettempdir()}" + ) self.name = path # Use 'name' to be compatible with tempfile.NamedTemporaryFile open(self.name, "x").close() # `touch` the file @@ -363,7 +376,11 @@ def __enter__(self): locked = False while not locked: try: - msvcrt.locking(self._fhandle.fileno(), msvcrt.LK_RLCK, get_file_size(self._fhandle)) + msvcrt.locking( + self._fhandle.fileno(), + msvcrt.LK_RLCK, + get_file_size(self._fhandle), + ) except OSError: locked = False else: @@ -379,7 +396,9 @@ def __exit__(self, exc_type, exc_val, exc_tb): return if sys.platform.startswith("win"): - msvcrt.locking(self._fhandle.fileno(), msvcrt.LK_UNLCK, get_file_size(self._fhandle)) + msvcrt.locking( + self._fhandle.fileno(), msvcrt.LK_UNLCK, get_file_size(self._fhandle) + ) else: fcntl.lockf(self._fhandle.fileno(), fcntl.LOCK_UN) @@ -634,7 +653,9 @@ def try_send_on_queue(queue, obj): try: send_on_queue(queue, obj) except Exception as err: - G_LOGGER.warning(f"Could not send object on queue: {err}\nSending None instead.") + G_LOGGER.warning( + f"Could not send object on queue: {err}\nSending None instead." + ) queue.put(None) @@ -697,7 +718,9 @@ def wrapped(*args, **kwargs): # Skip checks if we're calling these functions internally module = inspect.getmodule(sys._getframe(1)) called_from_polygraphy = ( - module is not None and module.__name__ and module.__name__.split(".")[0] == "polygraphy" + module is not None + and module.__name__ + and module.__name__.split(".")[0] == "polygraphy" ) if not called_from_polygraphy: @@ -736,14 +759,21 @@ def is_shape_dynamic(shape): @mod.export() def is_valid_shape_override(new_shape, original_shape): ranks_same = len(original_shape) == len(new_shape) - overrides_valid = all([odim == ndim or is_dimension_dynamic(odim) for odim, ndim in zip(original_shape, new_shape)]) + overrides_valid = all( + [ + odim == ndim or is_dimension_dynamic(odim) + for odim, ndim in zip(original_shape, new_shape) + ] + ) return ranks_same and overrides_valid @mod.export() def override_dynamic_shape(shape, default_shape_value=None): default_shape_value = default(default_shape_value, constants.DEFAULT_SHAPE_VALUE) - return [default_shape_value if is_dimension_dynamic(elem) else elem for elem in shape] + return [ + default_shape_value if is_dimension_dynamic(elem) else elem for elem in shape + ] @mod.export() @@ -788,19 +818,25 @@ def try_reshape(arr, shape): ) else: if array_util.shape(arr) != original_shape: - G_LOGGER.info(f"Reshaped array from shape: {original_shape} to: {array_util.shape(arr)}") + G_LOGGER.info( + f"Reshaped array from shape: {original_shape} to: {array_util.shape(arr)}" + ) return arr def try_permute(arr, shape): original_shape = array_util.shape(arr) if sorted(array_util.shape(arr)) != sorted(shape): - G_LOGGER.extra_verbose(f"Array of shape: {array_util.shape(arr)} cannot be permuted to: {shape}") + G_LOGGER.extra_verbose( + f"Array of shape: {array_util.shape(arr)} cannot be permuted to: {shape}" + ) return arr # We need to remove axes from the original shape as we use them to avoid # duplication in the permutation. - arr_shape_indices = {index: dimlen for index, dimlen in enumerate(array_util.shape(arr))} + arr_shape_indices = { + index: dimlen for index, dimlen in enumerate(array_util.shape(arr)) + } # Find which axis in array_util.shape(arr) corresponds to the specified size. Never returns duplicates. def find_axis(dimlen): @@ -830,7 +866,9 @@ def try_freeze_shape(arr, shape): determined_dim = volume(array_util.shape(arr)) // volume(static_dims) except ZeroDivisionError: determined_dim = 0 - shape = [determined_dim if is_dimension_dynamic(elem) else elem for elem in shape] + shape = [ + determined_dim if is_dimension_dynamic(elem) else elem for elem in shape + ] elif is_rank_same(arr, shape): shape = [ arr_shape_elem if is_dimension_dynamic(elem) else elem @@ -860,7 +898,9 @@ def try_freeze_shape(arr, shape): @mod.export() -def str_from_layer(prefix, index, name, op, input_names, input_meta, output_names, output_meta): +def str_from_layer( + prefix, index, name, op, input_names, input_meta, output_names, output_meta +): def tensor_names_to_string(tensor_names, meta): sep = ",\n " elems = [f"{name} {meta[name]}".strip() for name in tensor_names] diff --git a/tools/Polygraphy/setup.py b/tools/Polygraphy/setup.py index b0e14497..d3e7050c 100644 --- a/tools/Polygraphy/setup.py +++ b/tools/Polygraphy/setup.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/backend/base/test_loader.py b/tools/Polygraphy/tests/backend/base/test_loader.py index f836bcae..e32c4e08 100644 --- a/tools/Polygraphy/tests/backend/base/test_loader.py +++ b/tools/Polygraphy/tests/backend/base/test_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/backend/base/test_runner.py b/tools/Polygraphy/tests/backend/base/test_runner.py index f61b0710..1153de27 100644 --- a/tools/Polygraphy/tests/backend/base/test_runner.py +++ b/tools/Polygraphy/tests/backend/base/test_runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/backend/common/test_loader.py b/tools/Polygraphy/tests/backend/common/test_loader.py index 0aaba8c9..6c97df45 100644 --- a/tools/Polygraphy/tests/backend/common/test_loader.py +++ b/tools/Polygraphy/tests/backend/common/test_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -71,5 +71,7 @@ def example(): f.flush() os.fsync(f.fileno()) - with pytest.raises(PolygraphyException, match="Could not import symbol: non_existent from"): + with pytest.raises( + PolygraphyException, match="Could not import symbol: non_existent from" + ): invoke_from_script(f.name, "non_existent") diff --git a/tools/Polygraphy/tests/backend/onnx/test_loader.py b/tools/Polygraphy/tests/backend/onnx/test_loader.py index 50d8fede..b178a90d 100644 --- a/tools/Polygraphy/tests/backend/onnx/test_loader.py +++ b/tools/Polygraphy/tests/backend/onnx/test_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -59,7 +59,9 @@ def test_basic(self): @pytest.mark.serial def test_warn_if_impl_methods_called(self, check_warnings_on_loader_impl_methods): - check_warnings_on_loader_impl_methods(OnnxFromPath(ONNX_MODELS["identity"].path)) + check_warnings_on_loader_impl_methods( + OnnxFromPath(ONNX_MODELS["identity"].path) + ) def test_external_data(self): model = ONNX_MODELS["ext_weights"] @@ -113,7 +115,9 @@ def test_layerwise(self, copy): @pytest.mark.parametrize("output", ["identity_out_0", "identity_out_2"]) def test_custom_outputs(self, output): - loader = ModifyOutputs(OnnxFromPath(ONNX_MODELS["identity_identity"].path), outputs=[output]) + loader = ModifyOutputs( + OnnxFromPath(ONNX_MODELS["identity_identity"].path), outputs=[output] + ) model = loader() assert len(model.graph.output) == 1 assert model.graph.output[0].name == output @@ -148,7 +152,9 @@ def test_model(self, allow_onnxruntime): self.check_model(model) def test_path(self, allow_onnxruntime): - model = infer_shapes(ONNX_MODELS["identity_identity"].path, allow_onnxruntime=allow_onnxruntime) + model = infer_shapes( + ONNX_MODELS["identity_identity"].path, allow_onnxruntime=allow_onnxruntime + ) self.check_model(model) @pytest.mark.parametrize("set_data_dir", [True, False]) @@ -163,7 +169,9 @@ def test_external_data(self, set_data_dir, allow_onnxruntime): def test_save_to_disk_on_size_threshold(self, allow_onnxruntime): model = onnx_from_path(ONNX_MODELS["const_foldable"].path) - model = infer_shapes(model, save_to_disk_threshold_bytes=0, allow_onnxruntime=allow_onnxruntime) + model = infer_shapes( + model, save_to_disk_threshold_bytes=0, allow_onnxruntime=allow_onnxruntime + ) self.check_model(model) @@ -188,7 +196,9 @@ class TestFoldConstants: @pytest.mark.parametrize("partitioning", [None, "basic", "recursive"]) @pytest.mark.parametrize("copy", [True, False]) @pytest.mark.parametrize("allow_onnxruntime_shape_inference", [True, False]) - def test_basic(self, partitioning, fold_shapes, copy, allow_onnxruntime_shape_inference): + def test_basic( + self, partitioning, fold_shapes, copy, allow_onnxruntime_shape_inference + ): original_model = onnx_from_path(ONNX_MODELS["const_foldable"].path) loader = FoldConstants( original_model, @@ -275,7 +285,9 @@ def test_save_onnx(self): def test_external_data(self): with util.NamedTemporaryFile() as path, util.NamedTemporaryFile() as data: model = OnnxFromPath(ONNX_MODELS["const_foldable"].path) - loader = SaveOnnx(model, path.name, external_data_path=data.name, size_threshold=0) + loader = SaveOnnx( + model, path.name, external_data_path=data.name, size_threshold=0 + ) loader() assert is_file_non_empty(path.name) assert is_file_non_empty(data.name) @@ -284,8 +296,14 @@ def test_external_data(self): @pytest.fixture() def extract_model(): input_metadata = TensorMetadata().add("X", dtype=np.float32, shape=(64, 64)) - output_metadata = TensorMetadata().add("identity_out_0", dtype=np.float32, shape=None) - return onnx_from_path(ONNX_MODELS["identity_identity"].path), input_metadata, output_metadata + output_metadata = TensorMetadata().add( + "identity_out_0", dtype=np.float32, shape=None + ) + return ( + onnx_from_path(ONNX_MODELS["identity_identity"].path), + input_metadata, + output_metadata, + ) class TestExtractSubgraph: diff --git a/tools/Polygraphy/tests/backend/onnx/test_util.py b/tools/Polygraphy/tests/backend/onnx/test_util.py index 2f731dff..a009fd56 100644 --- a/tools/Polygraphy/tests/backend/onnx/test_util.py +++ b/tools/Polygraphy/tests/backend/onnx/test_util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,10 +15,7 @@ # limitations under the License. # -from polygraphy.backend.onnx import ( - onnx_from_path, - gs_from_onnx -) +from polygraphy.backend.onnx import onnx_from_path, gs_from_onnx from polygraphy.backend.onnx import util as onnx_util from tests.models.meta import ONNX_MODELS @@ -27,9 +24,10 @@ def test_get_num_nodes(): model = onnx_from_path(ONNX_MODELS["scan"].path) assert onnx_util.get_num_nodes(model) == 3 # Should count subgraph nodes. + def test_get_unbounded_dds_tensors(): model = onnx_from_path(ONNX_MODELS["unbounded_dds"].path) graph = gs_from_onnx(model) tensors = onnx_util.get_unbounded_dds_tensors(graph) assert len(tensors) == 1 - assert tensors[0].name == 'cast_out_6' \ No newline at end of file + assert tensors[0].name == "cast_out_6" diff --git a/tools/Polygraphy/tests/backend/onnxrt/test_loader.py b/tools/Polygraphy/tests/backend/onnxrt/test_loader.py index 4efab6f3..7f397b7b 100644 --- a/tools/Polygraphy/tests/backend/onnxrt/test_loader.py +++ b/tools/Polygraphy/tests/backend/onnxrt/test_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -51,5 +51,8 @@ def test_provider_matching(self, providers, expected): def test_invalid_providers_raise_errors(self): model = ONNX_MODELS["identity"] loader = SessionFromOnnx(model.loader, providers=["cpu", "not_a_real_provider"]) - with pytest.raises(PolygraphyException, match="Could not find specified ONNX-Runtime execution provider"): + with pytest.raises( + PolygraphyException, + match="Could not find specified ONNX-Runtime execution provider", + ): loader() diff --git a/tools/Polygraphy/tests/backend/onnxrt/test_runner.py b/tools/Polygraphy/tests/backend/onnxrt/test_runner.py index b1ed7967..ffb789f4 100644 --- a/tools/Polygraphy/tests/backend/onnxrt/test_runner.py +++ b/tools/Polygraphy/tests/backend/onnxrt/test_runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -85,7 +85,12 @@ def test_error_on_wrong_name_feed_dict(self, names, err): model = ONNX_MODELS["identity"] with OnnxrtRunner(SessionFromOnnx(model.loader)) as runner: with pytest.raises(PolygraphyException, match=err): - runner.infer({name: np.ones(shape=(1, 1, 2, 2), dtype=np.float32) for name in names}) + runner.infer( + { + name: np.ones(shape=(1, 1, 2, 2), dtype=np.float32) + for name in names + } + ) def test_error_on_wrong_dtype_feed_dict(self): model = ONNX_MODELS["identity"] diff --git a/tools/Polygraphy/tests/backend/pluginref/test_runner.py b/tools/Polygraphy/tests/backend/pluginref/test_runner.py index b0a18f0a..6c868d5e 100644 --- a/tools/Polygraphy/tests/backend/pluginref/test_runner.py +++ b/tools/Polygraphy/tests/backend/pluginref/test_runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -57,8 +57,16 @@ def test_works_on_multiple_nodes(self): def test_fail_on_unsupported_node(self): model = ONNX_MODELS["and"] with PluginRefRunner(GsFromOnnx(OnnxFromPath(model.path))) as runner: - with pytest.raises(PolygraphyException, match="does not have a reference implementation registered!"): - runner.infer({"x": np.ones(shape=(3, 4), dtype=bool), "y": np.ones(shape=(3, 4), dtype=bool)}) + with pytest.raises( + PolygraphyException, + match="does not have a reference implementation registered!", + ): + runner.infer( + { + "x": np.ones(shape=(3, 4), dtype=bool), + "y": np.ones(shape=(3, 4), dtype=bool), + } + ) @pytest.mark.parametrize( "names, err", @@ -72,7 +80,12 @@ def test_error_on_wrong_name_feed_dict(self, names, err): model = ONNX_MODELS["identity"] with PluginRefRunner(GsFromOnnx(OnnxFromPath(model.path))) as runner: with pytest.raises(PolygraphyException, match=err): - runner.infer({name: np.ones(shape=(1, 1, 2, 2), dtype=np.float32) for name in names}) + runner.infer( + { + name: np.ones(shape=(1, 1, 2, 2), dtype=np.float32) + for name in names + } + ) def test_error_on_wrong_dtype_feed_dict(self): model = ONNX_MODELS["identity"] diff --git a/tools/Polygraphy/tests/backend/tf/test_loader.py b/tools/Polygraphy/tests/backend/tf/test_loader.py index f05f402b..3b342706 100644 --- a/tools/Polygraphy/tests/backend/tf/test_loader.py +++ b/tools/Polygraphy/tests/backend/tf/test_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,7 +19,12 @@ import pytest from polygraphy import constants, util -from polygraphy.backend.tf import GraphFromFrozen, ModifyGraphOutputs, SaveGraph, graph_from_frozen +from polygraphy.backend.tf import ( + GraphFromFrozen, + ModifyGraphOutputs, + SaveGraph, + graph_from_frozen, +) from polygraphy.logger import G_LOGGER from tests.helper import is_file_non_empty from tests.models.meta import TF_MODELS @@ -61,12 +66,16 @@ def test_layerwise(self): class TestSaveGraph: def test_save_pb(self): with util.NamedTemporaryFile() as outpath: - tf_loader = SaveGraph(GraphFromFrozen(TF_MODELS["identity"].path), path=outpath.name) + tf_loader = SaveGraph( + GraphFromFrozen(TF_MODELS["identity"].path), path=outpath.name + ) tf_loader() assert is_file_non_empty(outpath.name) def test_save_tensorboard(self): with tempfile.TemporaryDirectory() as outdir: - tf_loader = SaveGraph(GraphFromFrozen(TF_MODELS["identity"].path), tensorboard_dir=outdir) + tf_loader = SaveGraph( + GraphFromFrozen(TF_MODELS["identity"].path), tensorboard_dir=outdir + ) tf_loader() assert os.path.exists(tf_loader.tensorboard_dir) diff --git a/tools/Polygraphy/tests/backend/tf/test_runner.py b/tools/Polygraphy/tests/backend/tf/test_runner.py index 22ceb98f..60faf220 100644 --- a/tools/Polygraphy/tests/backend/tf/test_runner.py +++ b/tools/Polygraphy/tests/backend/tf/test_runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -49,7 +49,11 @@ def test_warn_if_impl_methods_called(self, check_warnings_on_runner_impl_methods def test_save_timeline(self): model = TF_MODELS["identity"] with util.NamedTemporaryFile() as outpath: - with TfRunner(SessionFromGraph(model.loader), allow_growth=True, save_timeline=outpath.name) as runner: + with TfRunner( + SessionFromGraph(model.loader), + allow_growth=True, + save_timeline=outpath.name, + ) as runner: model.check_runner(runner) assert is_file_non_empty(outpath.name) @@ -65,16 +69,25 @@ def test_error_on_wrong_name_feed_dict(self, names, err): model = TF_MODELS["identity"] with TfRunner(SessionFromGraph(model.loader)) as runner: with pytest.raises(PolygraphyException, match=err): - runner.infer({name: np.ones(shape=(1, 15, 25, 30), dtype=np.float32) for name in names}) + runner.infer( + { + name: np.ones(shape=(1, 15, 25, 30), dtype=np.float32) + for name in names + } + ) def test_error_on_wrong_dtype_feed_dict(self): model = TF_MODELS["identity"] with TfRunner(SessionFromGraph(model.loader)) as runner: with pytest.raises(PolygraphyException, match="unexpected dtype."): - runner.infer({"Input:0": np.ones(shape=(1, 15, 25, 30), dtype=np.int32)}) + runner.infer( + {"Input:0": np.ones(shape=(1, 15, 25, 30), dtype=np.int32)} + ) def test_error_on_wrong_shape_feed_dict(self): model = TF_MODELS["identity"] with TfRunner(SessionFromGraph(model.loader)) as runner: with pytest.raises(PolygraphyException, match="incompatible shape."): - runner.infer({"Input:0": np.ones(shape=(1, 1, 25, 30), dtype=np.float32)}) + runner.infer( + {"Input:0": np.ones(shape=(1, 1, 25, 30), dtype=np.float32)} + ) diff --git a/tools/Polygraphy/tests/backend/trt/test_algorithm_selector.py b/tools/Polygraphy/tests/backend/trt/test_algorithm_selector.py index 8e35eaa1..0e6703a0 100644 --- a/tools/Polygraphy/tests/backend/trt/test_algorithm_selector.py +++ b/tools/Polygraphy/tests/backend/trt/test_algorithm_selector.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/backend/trt/test_calibrator.py b/tools/Polygraphy/tests/backend/trt/test_calibrator.py index 81926156..94f514a8 100644 --- a/tools/Polygraphy/tests/backend/trt/test_calibrator.py +++ b/tools/Polygraphy/tests/backend/trt/test_calibrator.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,6 +29,7 @@ network_from_onnx_bytes, ) from polygraphy.common import TensorMetadata +from polygraphy.comparator import DataLoader from polygraphy.datatype import DataType from polygraphy.exception import PolygraphyException from tests.helper import get_file_size, is_file_non_empty @@ -288,6 +289,19 @@ def test_calibrator_checks_input_metadata(self, expected_meta, meta, should_pass assert (calibrator.get_batch(list(expected_meta.keys())) is not None) == should_pass self.check_calibrator_cleanup(calibrator) + def test_calibrator_forces_float32_data(self): + data_loader = DataLoader() + + calibrator = Calibrator(data_loader) + + meta = TensorMetadata().add("input", dtype=DataType.FLOAT16, shape=(1, 2, 3)) + calibrator.set_input_metadata(meta) + + data = data_loader[0]["input"] + # TRT requires all calibration inputs to be provided in FP32 regardless of the data type + # in the original model. + assert util.array.dtype(data) == DataType.FLOAT32 + # TensorRT does not support changing input shapes during calibration @pytest.mark.xfail def test_calibrator_dynamic_shapes(self, dynamic_identity_builder_network): diff --git a/tools/Polygraphy/tests/backend/trt/test_loader.py b/tools/Polygraphy/tests/backend/trt/test_loader.py index 4ae2608a..2fa3d42b 100644 --- a/tools/Polygraphy/tests/backend/trt/test_loader.py +++ b/tools/Polygraphy/tests/backend/trt/test_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,6 +26,7 @@ EngineBytesFromNetwork, EngineFromBytes, EngineFromNetwork, + EngineFromPath, LoadPlugins, LoadRuntime, ModifyNetworkOutputs, @@ -126,9 +127,11 @@ def get_plugin_names(): loader = LoadPlugins( plugins=[ - "nvinfer_plugin.dll" - if sys.platform.startswith("win") - else "libnvinfer_plugin.so" + ( + "nvinfer_plugin.dll" + if sys.platform.startswith("win") + else "libnvinfer_plugin.so" + ) ] ) loader() @@ -144,7 +147,7 @@ def test_serialized_engine_loader_from_lambda(self, identity_engine): loader = EngineFromBytes(lambda: open(outpath.name, "rb").read()) with loader() as engine: assert isinstance(engine, trt.ICudaEngine) - + def test_serialized_engine_loader_from_buffer(self, identity_engine): with identity_engine.serialize() as buffer: loader = EngineFromBytes(buffer) @@ -158,6 +161,29 @@ def test_serialized_engine_loader_custom_runtime(self, identity_engine): assert isinstance(engine, trt.ICudaEngine) +@pytest.mark.skipif( + mod.version(trt.__version__) < mod.version("10.0"), reason="API was added in TRT 10.0" +) +class TestSerializedEngineLoaderFromDisk: + def test_serialized_engine_loader_from_lambda(self, identity_engine): + with util.NamedTemporaryFile() as outpath: + with open(outpath.name, "wb") as f, identity_engine.serialize() as buffer: + f.write(buffer) + + loader = EngineFromPath(lambda: outpath.name) + with loader() as engine: + assert isinstance(engine, trt.ICudaEngine) + + def test_serialized_engine_loader_custom_runtime(self, identity_engine): + with util.NamedTemporaryFile() as outpath: + with open(outpath.name, "wb") as f, identity_engine.serialize() as buffer: + f.write(buffer) + + loader = EngineFromPath(lambda: outpath.name, runtime=trt.Runtime(get_trt_logger())) + with loader() as engine: + assert isinstance(engine, trt.ICudaEngine) + + @pytest.mark.skipif( mod.version(trt.__version__) < mod.version("8.6"), reason="API was added in TRT 8.6" ) @@ -196,9 +222,16 @@ def test_loader(self): @pytest.mark.parametrize( "kwargs, flag", - [({"strongly_typed": True}, trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED)] - if mod.version(trt.__version__) >= mod.version("8.7") - else [], + ( + [ + ( + {"strongly_typed": True}, + trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED, + ) + ] + if mod.version(trt.__version__) >= mod.version("8.7") + else [] + ), ) def test_network_flags(self, kwargs, flag): builder, network, parser = network_from_onnx_bytes( @@ -214,9 +247,16 @@ def test_loader(self): @pytest.mark.parametrize( "kwargs, flag", - [({"strongly_typed": True}, trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED)] - if mod.version(trt.__version__) >= mod.version("8.7") - else [], + ( + [ + ( + {"strongly_typed": True}, + trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED, + ) + ] + if mod.version(trt.__version__) >= mod.version("8.7") + else [] + ), ) def test_network_flags(self, kwargs, flag): builder, network, parser = network_from_onnx_path( @@ -539,6 +579,7 @@ def test_onnx_like_from_network(self, model_name): NetworkFromOnnxBytes(ONNX_MODELS[model_name].loader) ) + class TestDefaultPlugins: def test_default_plugins(self): network_loader = NetworkFromOnnxBytes(ONNX_MODELS["roialign"].loader) diff --git a/tools/Polygraphy/tests/backend/trt/test_profile.py b/tools/Polygraphy/tests/backend/trt/test_profile.py index 2a8d5ad6..5cb66137 100644 --- a/tools/Polygraphy/tests/backend/trt/test_profile.py +++ b/tools/Polygraphy/tests/backend/trt/test_profile.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,7 +24,9 @@ @pytest.fixture(scope="session") def dynamic_identity_network(): - builder, network, parser = network_from_onnx_bytes(ONNX_MODELS["dynamic_identity"].loader) + builder, network, parser = network_from_onnx_bytes( + ONNX_MODELS["dynamic_identity"].loader + ) with builder, network, parser: yield builder, network, parser @@ -55,8 +57,18 @@ def test_fill_defaults_scalar_shape_tensor(self): # Need to add some other operations so TensorRT treats `fill_shape` as a shape tensor. fill = network.add_fill(tuple(), trt.FillOperation.LINSPACE) fill.set_input(0, fill_shape) - fill.set_input(1, network.add_constant(shape=tuple(), weights=np.array(0).astype(np.int32)).get_output(0)) - fill.set_input(2, network.add_constant(shape=tuple(), weights=np.array(1).astype(np.int32)).get_output(0)) + fill.set_input( + 1, + network.add_constant( + shape=tuple(), weights=np.array(0).astype(np.int32) + ).get_output(0), + ) + fill.set_input( + 2, + network.add_constant( + shape=tuple(), weights=np.array(1).astype(np.int32) + ).get_output(0), + ) network.mark_output(fill.get_output(0)) diff --git a/tools/Polygraphy/tests/backend/trt/test_runner.py b/tools/Polygraphy/tests/backend/trt/test_runner.py index 7b9f411f..601aff24 100644 --- a/tools/Polygraphy/tests/backend/trt/test_runner.py +++ b/tools/Polygraphy/tests/backend/trt/test_runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -79,7 +79,9 @@ def test_basic(self, identity_engine): assert not runner.is_active @pytest.mark.serial - def test_warn_if_impl_methods_called(self, check_warnings_on_runner_impl_methods, identity_engine): + def test_warn_if_impl_methods_called( + self, check_warnings_on_runner_impl_methods, identity_engine + ): runner = TrtRunner(identity_engine) check_warnings_on_runner_impl_methods(runner) @@ -96,23 +98,34 @@ def test_data_dependent_shapes(self, nonzero_engine, inp, expected): outputs = runner.infer( { "input": np.array( - inp, dtype=np.int32 if mod.version(trt.__version__) < mod.version("9.0") else np.int64 + inp, + dtype=( + np.int32 + if mod.version(trt.__version__) < mod.version("9.0") + else np.int64 + ), ) } ) - assert np.array_equal(outputs["nonzero_out_0"], np.array(expected, dtype=np.int32)) + assert np.array_equal( + outputs["nonzero_out_0"], np.array(expected, dtype=np.int32) + ) @pytest.mark.parametrize("copy_outputs_to_host", [True, False]) @pytest.mark.parametrize("device", ["cpu", "cuda"]) def test_torch_tensors(self, copy_outputs_to_host, identity_engine, device): with TrtRunner(identity_engine) as runner: arr = torch.ones([1, 1, 2, 2], dtype=torch.float32, device=device) - outputs = runner.infer({"x": arr}, copy_outputs_to_host=copy_outputs_to_host) + outputs = runner.infer( + {"x": arr}, copy_outputs_to_host=copy_outputs_to_host + ) assert all(isinstance(t, torch.Tensor) for t in outputs.values()) assert torch.equal(outputs["y"].to("cpu"), arr.to("cpu")) - assert outputs["y"].device.type == ("cpu" if copy_outputs_to_host else "cuda") + assert outputs["y"].device.type == ( + "cpu" if copy_outputs_to_host else "cuda" + ) def test_context(self, identity_engine): with TrtRunner(identity_engine.create_execution_context) as runner: @@ -131,9 +144,15 @@ def test_shape_output(self): model.check_runner(runner) def test_multithreaded_runners_from_engine(self, identity_engine): - with TrtRunner(identity_engine) as runner0, TrtRunner(identity_engine) as runner1: - t1 = threading.Thread(target=ONNX_MODELS["identity"].check_runner, args=(runner0,)) - t2 = threading.Thread(target=ONNX_MODELS["identity"].check_runner, args=(runner1,)) + with TrtRunner(identity_engine) as runner0, TrtRunner( + identity_engine + ) as runner1: + t1 = threading.Thread( + target=ONNX_MODELS["identity"].check_runner, args=(runner0,) + ) + t2 = threading.Thread( + target=ONNX_MODELS["identity"].check_runner, args=(runner1,) + ) t1.start() t2.start() t1.join() @@ -141,12 +160,17 @@ def test_multithreaded_runners_from_engine(self, identity_engine): @pytest.mark.parametrize("use_optimization_profile", [True, False]) @pytest.mark.skipif( - mod.version(trt.__version__) >= mod.version("8.6") and mod.version(trt.__version__) < mod.version("8.7"), + mod.version(trt.__version__) >= mod.version("8.6") + and mod.version(trt.__version__) < mod.version("8.7"), reason="Bug in TRT 8.6", ) def test_multiple_profiles(self, use_optimization_profile): model = ONNX_MODELS["dynamic_identity"] - profile0_shapes = [(1, 2, 1, 1), (1, 2, 1, 1), (1, 2, 1, 1)] # Use min==opt==max to fix shapes in the engine. + profile0_shapes = [ + (1, 2, 1, 1), + (1, 2, 1, 1), + (1, 2, 1, 1), + ] # Use min==opt==max to fix shapes in the engine. profile1_shapes = [(1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)] profile2_shapes = [(1, 2, 4, 4), (1, 2, 8, 8), (1, 2, 16, 16)] network_loader = NetworkFromOnnxBytes(model.loader) @@ -159,7 +183,9 @@ def test_multiple_profiles(self, use_optimization_profile): engine = engine_from_network(network_loader, config_loader) context = engine.create_execution_context() - for index, shapes in enumerate([profile0_shapes, profile1_shapes, profile2_shapes]): + for index, shapes in enumerate( + [profile0_shapes, profile1_shapes, profile2_shapes] + ): with TrtRunner( context, optimization_profile=index if use_optimization_profile else None, @@ -171,12 +197,13 @@ def test_multiple_profiles(self, use_optimization_profile): for shape in shapes: model.check_runner(runner, {"X": shape}) - @pytest.mark.skipif( mod.version(trt.__version__) < mod.version("10.0"), reason="Feature not present before 10.0", ) - @pytest.mark.parametrize("allocation_strategy", [None, "static", "profile", "runtime"]) + @pytest.mark.parametrize( + "allocation_strategy", [None, "static", "profile", "runtime"] + ) def test_allocation_strategies(self, allocation_strategy): model = ONNX_MODELS["residual_block"] profile0_shapes = [(1, 3, 224, 224), (1, 3, 224, 224), (1, 3, 224, 224)] @@ -191,7 +218,9 @@ def test_allocation_strategies(self, allocation_strategy): config_loader = CreateConfig(profiles=profiles) engine = engine_from_network(network_loader, config_loader) - for index, shapes in enumerate([profile0_shapes, profile1_shapes, profile2_shapes]): + for index, shapes in enumerate( + [profile0_shapes, profile1_shapes, profile2_shapes] + ): with TrtRunner( engine, optimization_profile=index, @@ -200,7 +229,6 @@ def test_allocation_strategies(self, allocation_strategy): for shape in shapes: model.check_runner(runner, {"gpu_0/data_0": shape}) - def test_empty_tensor_with_dynamic_input_shape_tensor(self): model = ONNX_MODELS["empty_tensor_expand"] shapes = [(1, 2, 0, 3, 0), (2, 2, 0, 3, 0), (4, 2, 0, 3, 0)] @@ -224,7 +252,12 @@ def test_empty_tensor_with_dynamic_input_shape_tensor(self): def test_error_on_wrong_name_feed_dict(self, names, err, identity_engine, module): with TrtRunner(identity_engine) as runner: with pytest.raises(PolygraphyException, match=err): - runner.infer({name: module.ones((1, 1, 2, 2), dtype=module.float32) for name in names}) + runner.infer( + { + name: module.ones((1, 1, 2, 2), dtype=module.float32) + for name in names + } + ) @pytest.mark.parametrize("module", [torch, np]) def test_error_on_wrong_dtype_feed_dict(self, identity_engine, module): @@ -238,9 +271,13 @@ def test_error_on_wrong_shape_feed_dict(self, identity_engine, module): with pytest.raises(PolygraphyException, match="incompatible shape."): runner.infer({"x": module.ones((1, 1, 3, 2), dtype=module.float32)}) - @pytest.mark.parametrize("use_view", [True, False]) # We should be able to use DeviceArray in place of DeviceView + @pytest.mark.parametrize( + "use_view", [True, False] + ) # We should be able to use DeviceArray in place of DeviceView def test_device_views(self, use_view, reducable_engine): - with TrtRunner(reducable_engine) as runner, cuda.DeviceArray((1,), dtype=np.float32) as x: + with TrtRunner(reducable_engine) as runner, cuda.DeviceArray( + (1,), dtype=np.float32 + ) as x: x.copy_from(np.ones((1,), dtype=np.float32)) outputs = runner.infer( { @@ -266,36 +303,71 @@ def check(outputs): assert np.all(outputs["y"] == inp) check(runner.infer({"x": inp})) - check(runner.infer({"x": cuda.DeviceArray(shape=inp.shape, dtype=inp.dtype).copy_from(inp)})) + check( + runner.infer( + { + "x": cuda.DeviceArray( + shape=inp.shape, dtype=inp.dtype + ).copy_from(inp) + } + ) + ) torch_outputs = runner.infer({"x": torch.from_numpy(inp)}) check({name: out.numpy() for name, out in torch_outputs.items()}) check(runner.infer({"x": inp})) - @pytest.mark.parametrize("use_view", [True, False]) # We should be able to use DeviceArray in place of DeviceView + @pytest.mark.parametrize( + "use_view", [True, False] + ) # We should be able to use DeviceArray in place of DeviceView def test_device_view_dynamic_shapes(self, use_view): model = ONNX_MODELS["dynamic_identity"] profiles = [ Profile().add("X", (1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)), ] - runner = TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(model.loader), CreateConfig(profiles=profiles))) + runner = TrtRunner( + EngineFromNetwork( + NetworkFromOnnxBytes(model.loader), CreateConfig(profiles=profiles) + ) + ) with runner, cuda.DeviceArray(shape=(1, 2, 3, 3), dtype=np.float32) as arr: inp = np.random.random_sample(size=(1, 2, 3, 3)).astype(np.float32) arr.copy_from(inp) - outputs = runner.infer({"X": cuda.DeviceView(arr.ptr, arr.shape, arr.dtype) if use_view else arr}) + outputs = runner.infer( + { + "X": ( + cuda.DeviceView(arr.ptr, arr.shape, arr.dtype) + if use_view + else arr + ) + } + ) assert np.all(outputs["Y"] == inp) assert outputs["Y"].shape == (1, 2, 3, 3) def test_cannot_use_device_view_shape_tensor(self): model = ONNX_MODELS["empty_tensor_expand"] - with TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(model.loader))) as runner, cuda.DeviceArray( - shape=(5,), dtype=np.int32 if mod.version(trt.__version__) < mod.version("9.0") else np.int64 + with TrtRunner( + EngineFromNetwork(NetworkFromOnnxBytes(model.loader)) + ) as runner, cuda.DeviceArray( + shape=(5,), + dtype=( + np.int32 + if mod.version(trt.__version__) < mod.version("9.0") + else np.int64 + ), ) as arr: - with pytest.raises(PolygraphyException, match="it must reside in host memory"): - runner.infer({"data": np.ones((2, 0, 3, 0), dtype=np.float32), "new_shape": arr}) + with pytest.raises( + PolygraphyException, match="it must reside in host memory" + ): + runner.infer( + {"data": np.ones((2, 0, 3, 0), dtype=np.float32), "new_shape": arr} + ) @pytest.mark.parametrize("hwc_input", [True, False], ids=["hwc_input", "chw_input"]) - @pytest.mark.parametrize("hwc_output", [True, False], ids=["hwc_output", "chw_output"]) + @pytest.mark.parametrize( + "hwc_output", [True, False], ids=["hwc_output", "chw_output"] + ) def test_infer_chw_format(self, hwc_input, hwc_output): model = ONNX_MODELS["identity_multi_ch"] inp_shape = model.input_metadata["x"].shape @@ -317,7 +389,9 @@ def test_infer_chw_format(self, hwc_input, hwc_output): outputs = runner.infer({"x": inp}) if hwc_input == hwc_output: # output in CHW/HWC format and similarly shaped assert np.allclose(outputs["y"], inp) - elif not hwc_input and hwc_output: # output in HWC format and shaped (N, H, W, C) + elif ( + not hwc_input and hwc_output + ): # output in HWC format and shaped (N, H, W, C) assert np.allclose(outputs["y"].transpose(0, 3, 1, 2), inp) else: # hwc_input and not hwc_output: output in CHW format and shaped (N, C, H, W) assert np.allclose(outputs["y"].transpose(0, 2, 3, 1), inp) @@ -328,7 +402,9 @@ def test_get_array_on_cpu(self, use_torch): with cuda.DeviceArray.raw(shape) as arr: host_buffers = {} stream = cuda.Stream() - host_arr = _get_array_on_cpu(arr, "test", host_buffers, stream, arr.nbytes, use_torch) + host_arr = _get_array_on_cpu( + arr, "test", host_buffers, stream, arr.nbytes, use_torch + ) if use_torch: assert isinstance(host_arr, torch.Tensor) @@ -345,24 +421,17 @@ def test_weight_streaming(self, budget): network_loader = NetworkFromOnnxBytes(model.loader, strongly_typed=True) config_loader = CreateConfig(weight_streaming=True) engine = engine_from_network(network_loader, config_loader) - + if budget == np.inf: # set to max size - 1 budget = engine.streamable_weights_size - 1 - kwargs = { - "weight_streaming_budget": None, - "weight_streaming_percent": None - } + kwargs = {"weight_streaming_budget": None, "weight_streaming_percent": None} if budget is not None: if 0 < budget <= 1: kwargs["weight_streaming_percent"] = budget * 100 else: kwargs["weight_streaming_budget"] = int(budget) - with TrtRunner( - engine, - optimization_profile=0, - **kwargs - ) as runner: + with TrtRunner(engine, optimization_profile=0, **kwargs) as runner: model.check_runner(runner) diff --git a/tools/Polygraphy/tests/backend/trt/test_util.py b/tools/Polygraphy/tests/backend/trt/test_util.py index 1bd9d44c..730e55d6 100644 --- a/tools/Polygraphy/tests/backend/trt/test_util.py +++ b/tools/Polygraphy/tests/backend/trt/test_util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/common/test_datatype.py b/tools/Polygraphy/tests/common/test_datatype.py index c9e5108a..c1515e18 100644 --- a/tools/Polygraphy/tests/common/test_datatype.py +++ b/tools/Polygraphy/tests/common/test_datatype.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/common/test_interface.py b/tools/Polygraphy/tests/common/test_interface.py index 72d731b7..85ea4dcb 100644 --- a/tools/Polygraphy/tests/common/test_interface.py +++ b/tools/Polygraphy/tests/common/test_interface.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/common/test_struct.py b/tools/Polygraphy/tests/common/test_struct.py index 76c95b93..2014a66d 100644 --- a/tools/Polygraphy/tests/common/test_struct.py +++ b/tools/Polygraphy/tests/common/test_struct.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,6 +17,7 @@ from polygraphy.common import TensorMetadata from polygraphy.datatype import DataType + class TestTensorMetadata: def test_str(self): meta = TensorMetadata().add("X", dtype=DataType.FLOAT32, shape=(64, 64)) diff --git a/tools/Polygraphy/tests/comparator/test_comparator.py b/tools/Polygraphy/tests/comparator/test_comparator.py index 720b5549..e5c0e2d6 100644 --- a/tools/Polygraphy/tests/comparator/test_comparator.py +++ b/tools/Polygraphy/tests/comparator/test_comparator.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,9 +24,21 @@ from polygraphy.backend.onnx import GsFromOnnx, OnnxFromBytes from polygraphy.backend.onnxrt import OnnxrtRunner, SessionFromOnnx from polygraphy.backend.pluginref import PluginRefRunner -from polygraphy.backend.trt import EngineFromNetwork, NetworkFromOnnxBytes, TrtRunner, network_from_onnx_bytes +from polygraphy.backend.trt import ( + EngineFromNetwork, + NetworkFromOnnxBytes, + TrtRunner, + network_from_onnx_bytes, +) from polygraphy.backend.trt.util import get_all_tensors -from polygraphy.comparator import Comparator, CompareFunc, DataLoader, IterationResult, PostprocessFunc, RunResults +from polygraphy.comparator import ( + Comparator, + CompareFunc, + DataLoader, + IterationResult, + PostprocessFunc, + RunResults, +) from polygraphy.exception import PolygraphyException from tests.models.meta import ONNX_MODELS @@ -86,7 +98,9 @@ def test_postprocess(self): onnx_loader = ONNX_MODELS["identity"].loader run_results = Comparator.run([OnnxrtRunner(SessionFromOnnx(onnx_loader))]) # Output shape is (1, 1, 2, 2) - postprocessed = Comparator.postprocess(run_results, postprocess_func=PostprocessFunc.top_k(k=(1, -1))) + postprocessed = Comparator.postprocess( + run_results, postprocess_func=PostprocessFunc.top_k(k=(1, -1)) + ) for _, results in postprocessed.items(): for result in results: for _, output in result.items(): @@ -126,13 +140,17 @@ def test_multirun_outputs_are_different(self): @pytest.mark.parametrize("array_type", [np.array, build_torch]) def test_validate_nan(self, array_type): run_results = RunResults() - run_results["fake-runner"] = [IterationResult(outputs={"x": array_type(np.nan)})] + run_results["fake-runner"] = [ + IterationResult(outputs={"x": array_type(np.nan)}) + ] assert not Comparator.validate(run_results) @pytest.mark.parametrize("array_type", [np.array, build_torch]) def test_validate_inf(self, array_type): run_results = RunResults() - run_results["fake-runner"] = [IterationResult(outputs={"x": array_type(np.inf)})] + run_results["fake-runner"] = [ + IterationResult(outputs={"x": array_type(np.inf)}) + ] assert not Comparator.validate(run_results, check_inf=True) def test_dim_param_trt_onnxrt(self): @@ -154,7 +172,7 @@ def test_dim_param_trt_onnxrt(self): mod.version(trt.__version__) < mod.version("10.0"), reason="Feature not present before 10.0", ) - def test_debug_tensors(self): + def test_debug_tensors(self): model = ONNX_MODELS["identity"] builder, network, parser = network_from_onnx_bytes(model.loader) tensor_map = get_all_tensors(network) @@ -165,7 +183,14 @@ def test_debug_tensors(self): run_results = Comparator.run(runners, data_loader=data) for iteration_list in run_results.values(): # There should be 2 outputs, debug tensor "x" and output "y" - assert len(list(iteration_list[0].items())) == 2 - run_results["fake-runner"] = [IterationResult(outputs={"x": np.ones((1, 1, 2, 2), dtype=np.float32), "y": np.ones((1, 1, 2, 2), dtype=np.float32)})] + assert len(list(iteration_list[0].items())) == 2 + run_results["fake-runner"] = [ + IterationResult( + outputs={ + "x": np.ones((1, 1, 2, 2), dtype=np.float32), + "y": np.ones((1, 1, 2, 2), dtype=np.float32), + } + ) + ] compare_func = CompareFunc.simple(check_shapes=True) assert bool(Comparator.compare_accuracy(run_results, compare_func=compare_func)) diff --git a/tools/Polygraphy/tests/comparator/test_compare.py b/tools/Polygraphy/tests/comparator/test_compare.py index 97f255c4..d093d96e 100644 --- a/tools/Polygraphy/tests/comparator/test_compare.py +++ b/tools/Polygraphy/tests/comparator/test_compare.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/comparator/test_data_loader.py b/tools/Polygraphy/tests/comparator/test_data_loader.py index f60fea40..dee4ddab 100644 --- a/tools/Polygraphy/tests/comparator/test_data_loader.py +++ b/tools/Polygraphy/tests/comparator/test_data_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/comparator/test_postprocess.py b/tools/Polygraphy/tests/comparator/test_postprocess.py index c4c2a0d9..a4d82ad6 100644 --- a/tools/Polygraphy/tests/comparator/test_postprocess.py +++ b/tools/Polygraphy/tests/comparator/test_postprocess.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,6 +21,7 @@ build_torch = lambda a, **kwargs: util.array.to_torch(np.array(a, **kwargs)) + @pytest.mark.parametrize("array_type", [np.array, build_torch]) class TestTopK: def test_basic(self, array_type): diff --git a/tools/Polygraphy/tests/comparator/test_struct.py b/tools/Polygraphy/tests/comparator/test_struct.py index bbb720f2..316f8020 100644 --- a/tools/Polygraphy/tests/comparator/test_struct.py +++ b/tools/Polygraphy/tests/comparator/test_struct.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -106,7 +106,9 @@ def test_add_new(self): iter_results = results["custom"] assert len(iter_results) == 1 - assert all(isinstance(iter_result, IterationResult) for iter_result in iter_results) + assert all( + isinstance(iter_result, IterationResult) for iter_result in iter_results + ) def test_add_new_default_name(self): results = RunResults() @@ -115,7 +117,9 @@ def test_add_new_default_name(self): name = results[0][0] iter_results = results[name] assert len(iter_results) == 1 - assert all(isinstance(iter_result, IterationResult) for iter_result in iter_results) + assert all( + isinstance(iter_result, IterationResult) for iter_result in iter_results + ) @pytest.mark.parametrize("module", [torch, np]) diff --git a/tools/Polygraphy/tests/conftest.py b/tools/Polygraphy/tests/conftest.py index b9a80865..610ed04a 100644 --- a/tools/Polygraphy/tests/conftest.py +++ b/tools/Polygraphy/tests/conftest.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -34,7 +34,9 @@ def sandboxed_install_run(virtualenv, script_runner): Packages from the test environment are still usable, but those in the virtual environment take precedence """ - VENV_PYTHONPATH = glob.glob(os.path.join(virtualenv.virtualenv, "lib", "python*", "site-packages"))[0] + VENV_PYTHONPATH = glob.glob( + os.path.join(virtualenv.virtualenv, "lib", "python*", "site-packages") + )[0] class StatusWrapper: def __init__(self, stdout=None, stderr=None, success=None) -> None: @@ -58,7 +60,9 @@ def run_impl(command, cwd=None): status.stdout = sr_status.stdout status.success = sr_status.success else: - sp_status = sp.run(command, cwd=cwd, env=env, stdout=sp.PIPE, stderr=sp.PIPE) + sp_status = sp.run( + command, cwd=cwd, env=env, stdout=sp.PIPE, stderr=sp.PIPE + ) def try_decode(inp): try: @@ -124,7 +128,10 @@ def check_warning(method, warning_expected): metadata = runner.get_input_metadata_impl() runner.infer_impl( { - name: np.ones(shape, dtype=DataType.to_dtype(DataType.from_dtype(dtype), "numpy")) + name: np.ones( + shape, + dtype=DataType.to_dtype(DataType.from_dtype(dtype), "numpy"), + ) for name, (dtype, shape) in metadata.items() } ) @@ -174,10 +181,15 @@ def check(loader): @pytest.fixture() -@pytest.mark.skipif(sys.platform.startswith("win"), reason="Fixture has not been updated to work on Windows") +@pytest.mark.skipif( + sys.platform.startswith("win"), + reason="Fixture has not been updated to work on Windows", +) def nvinfer_lean_path(): lean_library_name = ctypes.util.find_library("nvinfer_lean") - for dirname in os.environ.get("LD_LIBRARY_PATH", "").split(os.path.pathsep) + ["/usr/lib/x86_64-linux-gnu"]: + for dirname in os.environ.get("LD_LIBRARY_PATH", "").split(os.path.pathsep) + [ + "/usr/lib/x86_64-linux-gnu" + ]: path = os.path.join(dirname, lean_library_name) if os.path.exists(path): return path diff --git a/tools/Polygraphy/tests/cuda/test_cuda.py b/tools/Polygraphy/tests/cuda/test_cuda.py index 213f8724..7552f2f6 100644 --- a/tools/Polygraphy/tests/cuda/test_cuda.py +++ b/tools/Polygraphy/tests/cuda/test_cuda.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/func/test_func.py b/tools/Polygraphy/tests/func/test_func.py index 3685855a..7ae5eb5d 100644 --- a/tools/Polygraphy/tests/func/test_func.py +++ b/tools/Polygraphy/tests/func/test_func.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -130,7 +130,8 @@ def x(): return 1, 2 with pytest.raises( - PolygraphyException, match=r"Function: y accepts 1 parameter\(s\), but needs to accept 2 parameter\(s\)" + PolygraphyException, + match=r"Function: y accepts 1 parameter\(s\), but needs to accept 2 parameter\(s\)", ): @func.extend(x) @@ -168,7 +169,9 @@ def modify_x(self): self.x = 2 d = Dummy() - with pytest.raises(PolygraphyInternalException, match="was mutated in a constant method"): + with pytest.raises( + PolygraphyInternalException, match="was mutated in a constant method" + ): d.modify_x() def test_cannot_add_attrs(self): @@ -178,5 +181,7 @@ def modify_x(self): self.x = 2 d = Dummy() - with pytest.raises(PolygraphyInternalException, match="was mutated in a constant method"): + with pytest.raises( + PolygraphyInternalException, match="was mutated in a constant method" + ): d.modify_x() diff --git a/tools/Polygraphy/tests/helper.py b/tools/Polygraphy/tests/helper.py index e4141c72..629f7ecd 100644 --- a/tools/Polygraphy/tests/helper.py +++ b/tools/Polygraphy/tests/helper.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -28,7 +28,14 @@ "convert": [], "inspect": ["data", "model", "tactics", "capability", "diff-tactics"], "check": ["lint"], - "surgeon": ["extract", "insert", "sanitize", "prune", "weight-strip", "weight-reconstruct"], + "surgeon": [ + "extract", + "insert", + "sanitize", + "prune", + "weight-strip", + "weight-reconstruct", + ], "template": ["trt-network", "trt-config", "onnx-gs"], "debug": ["build", "precision", "reduce", "repeat"], "data": ["to-input"], diff --git a/tools/Polygraphy/tests/logger/test_logger.py b/tools/Polygraphy/tests/logger/test_logger.py index e7652829..226740e0 100644 --- a/tools/Polygraphy/tests/logger/test_logger.py +++ b/tools/Polygraphy/tests/logger/test_logger.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -41,7 +41,10 @@ def test_line_info(self): logger.info("Hello") log_file.seek(0) - assert f"[I] [tests/logger/test_logger.py:{inspect.currentframe().f_lineno - 2}] Hello\n" == log_file.read() + assert ( + f"[I] [tests/logger/test_logger.py:{inspect.currentframe().f_lineno - 5}] Hello\n" + == log_file.read() + ) def test_severity_trie_with_no_default(self): logger = Logger(severity={"backend/trt": 10}) @@ -91,5 +94,12 @@ class TestSeverityTrie: ) def test_get(self, path, sev): # Duplicate slashes should be handled - trie = SeverityTrie({"": 30, "backend/trt": 20, "backend/trt/loader.py": 50, "backend///////onnx": 28}) + trie = SeverityTrie( + { + "": 30, + "backend/trt": 20, + "backend/trt/loader.py": 50, + "backend///////onnx": 28, + } + ) assert trie.get(path) == sev diff --git a/tools/Polygraphy/tests/mod/conftest.py b/tools/Polygraphy/tests/mod/conftest.py index 89e4d02f..4e6a1d13 100644 --- a/tools/Polygraphy/tests/mod/conftest.py +++ b/tools/Polygraphy/tests/mod/conftest.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/mod/test_dependencies.py b/tools/Polygraphy/tests/mod/test_dependencies.py index 859bd1f9..bba06716 100644 --- a/tools/Polygraphy/tests/mod/test_dependencies.py +++ b/tools/Polygraphy/tests/mod/test_dependencies.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -34,7 +34,11 @@ def is_submodule(path): - file_mod = os.path.isfile(path) and path.endswith(".py") and os.path.basename(path) != "__init__.py" + file_mod = ( + os.path.isfile(path) + and path.endswith(".py") + and os.path.basename(path) != "__init__.py" + ) dir_mod = os.path.isdir(path) and os.path.isfile(os.path.join(path, "__init__.py")) return file_mod or dir_mod @@ -148,7 +152,9 @@ def get_colored_version(): ("==1.4.2", "==1.4.2"), ], ) - def test_can_automatically_install_requirements(self, poly_venv, new_ver, expected, preinstall): + def test_can_automatically_install_requirements( + self, poly_venv, new_ver, expected, preinstall + ): poly_venv.env["POLYGRAPHY_AUTOINSTALL_DEPS"] = "1" def get_colored_version(): @@ -211,7 +217,9 @@ def test_ask_before_autoinstall(self, response, should_install, poly_venv): [ poly_venv.python, "-c", - "from polygraphy import mod; " "colored = mod.lazy_import('colored'); " "mod.autoinstall(colored)", + "from polygraphy import mod; " + "colored = mod.lazy_import('colored'); " + "mod.autoinstall(colored)", ], env=poly_venv.env, stdin=sp.PIPE, diff --git a/tools/Polygraphy/tests/mod/test_exporter.py b/tools/Polygraphy/tests/mod/test_exporter.py index 624a84e6..d8686ad3 100644 --- a/tools/Polygraphy/tests/mod/test_exporter.py +++ b/tools/Polygraphy/tests/mod/test_exporter.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -54,7 +54,9 @@ def __init__(self, x): self.x = x def test_funcify_duplicate_parameters_in_call_init(self): - with pytest.raises(AssertionError, match="call_impl and __init__ have the same argument names"): + with pytest.raises( + AssertionError, match="call_impl and __init__ have the same argument names" + ): @mod.export(funcify=True) class DupArgs(BaseLoader): @@ -78,7 +80,10 @@ def call_impl(self): assert "DocstringFunctor" in __all__ assert "docstring_functor" in __all__ - assert docstring_functor.__doc__ == "Immediately evaluated functional variant of :class:`DocstringFunctor` .\n" + assert ( + docstring_functor.__doc__ + == "Immediately evaluated functional variant of :class:`DocstringFunctor` .\n" + ) def test_funcify_functor_no_call_args(self): @mod.export(funcify=True) diff --git a/tools/Polygraphy/tests/mod/test_importer.py b/tools/Polygraphy/tests/mod/test_importer.py index dc269b8c..5c2dcdc2 100644 --- a/tools/Polygraphy/tests/mod/test_importer.py +++ b/tools/Polygraphy/tests/mod/test_importer.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,11 +21,13 @@ from textwrap import dedent import pytest +import tempfile import tensorrt as trt from polygraphy import mod, util from polygraphy.exception import PolygraphyException from polygraphy.mod.importer import _version_ok +common_backend = mod.lazy_import("polygraphy.backend.common") class TestImporter: def test_import_from_script(self): @@ -59,6 +61,41 @@ def load_network(builder, network): assert network.get_layer(0).type == trt.LayerType.IDENTITY assert sys.path == orig_sys_path + def test_import_from_script_same_method_different_modules(self): + module1_script = dedent( + """ + def print_message(): + print(f"msg1::print_message") + return "msg1" + """ + ) + + module2_script = dedent( + """ + def print_message(): + print(f"msg2::print_message") + return "msg2" + """ + ) + + with tempfile.TemporaryDirectory() as tempdir: + os.mkdir(os.path.join(tempdir, "msg1")) + with open(os.path.join(tempdir, "msg1", "msg.py"), "w+") as msg1_msg: + msg1_msg.write(module1_script) + msg1_msg.flush() + os.fsync(msg1_msg.fileno()) + + os.mkdir(os.path.join(tempdir, "msg2")) + with open(os.path.join(tempdir, "msg2", "msg.py"), "w+") as msg2_msg: + msg2_msg.write(module2_script) + msg2_msg.flush() + os.fsync(msg2_msg.fileno()) + + for msg_module in ['msg1', 'msg2']: + msg_loc = os.path.join(tempdir,msg_module,'msg.py') + msg = common_backend.invoke_from_script(msg_loc, "print_message") + assert msg==msg_module + def test_import_non_existent(self): script = dedent( """ diff --git a/tools/Polygraphy/tests/mod/test_util.py b/tools/Polygraphy/tests/mod/test_util.py index beb16268..310665b5 100644 --- a/tools/Polygraphy/tests/mod/test_util.py +++ b/tools/Polygraphy/tests/mod/test_util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/models/make_models.py b/tools/Polygraphy/tests/models/make_models.py index cf3a1e3a..759a5bc0 100644 --- a/tools/Polygraphy/tests/models/make_models.py +++ b/tools/Polygraphy/tests/models/make_models.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -27,6 +27,7 @@ import onnx_graphsurgeon as gs from meta import ONNX_MODELS from polygraphy.tools.sparse import SparsityPruner + CURDIR = os.path.dirname(__file__) @@ -54,12 +55,16 @@ def sub(self, a, b, **kwargs): @gs.Graph.register() def constant(self, values: gs.Constant, **kwargs): - return self.layer(op="Constant", outputs=["constant_out"], attrs={"value": values}, **kwargs)[0] + return self.layer( + op="Constant", outputs=["constant_out"], attrs={"value": values}, **kwargs + )[0] @gs.Graph.register() def reshape(self, data, shape, **kwargs): - return self.layer(op="Reshape", inputs=[data, shape], outputs=["reshape_out"], **kwargs)[0] + return self.layer( + op="Reshape", inputs=[data, shape], outputs=["reshape_out"], **kwargs + )[0] @gs.Graph.register() @@ -76,28 +81,41 @@ def tile(self, inp, repeats): def nonzero(self, inp, **kwargs): return self.layer(op="NonZero", inputs=[inp], outputs=["nonzero_out"], **kwargs)[0] + # Name range as onnx_range as range is a python built-in function. @gs.Graph.register() def onnx_range(self, start, limit, delta, **kwargs): - return self.layer(op="Range", inputs=[start, limit, delta], outputs=["range_out"], **kwargs)[0] + return self.layer( + op="Range", inputs=[start, limit, delta], outputs=["range_out"], **kwargs + )[0] @gs.Graph.register() def cast(self, input, type, **kwargs): - return self.layer(op="Cast", inputs=[input], attrs={"to": type}, outputs=["cast_out"], **kwargs)[0] + return self.layer( + op="Cast", inputs=[input], attrs={"to": type}, outputs=["cast_out"], **kwargs + )[0] @gs.Graph.register() def reduce_max(self, input, keep_dims, **kwargs): return self.layer( - op="ReduceMax", inputs=[input], attrs={"keepdims": keep_dims}, outputs=["reduce_max_out"], **kwargs + op="ReduceMax", + inputs=[input], + attrs={"keepdims": keep_dims}, + outputs=["reduce_max_out"], + **kwargs, )[0] @gs.Graph.register() def conv(self, input, weights, kernel_shape, **kwargs): return self.layer( - op="Conv", inputs=[input, weights], attrs={"kernel_shape": kernel_shape}, outputs=["conv_out"], **kwargs + op="Conv", + inputs=[input, weights], + attrs={"kernel_shape": kernel_shape}, + outputs=["conv_out"], + **kwargs, )[0] @@ -110,42 +128,45 @@ def split(self, inp, split, axis=0): attrs={"axis": axis, "split": split}, ) + @gs.Graph.register() def transpose(self, inp, **kwargs): return self.layer( - op="Transpose", - inputs=[inp], - outputs=["transpose_out"], - **kwargs + op="Transpose", inputs=[inp], outputs=["transpose_out"], **kwargs )[0] + @gs.Graph.register() def quantize_linear(self, inp, y_scale, y_zero_point, **kwargs): return self.layer( op="QuantizeLinear", inputs=[inp, y_scale, y_zero_point], outputs=["quantize_linear_out"], - **kwargs + **kwargs, )[0] + @gs.Graph.register() def dequantize_linear(self, inp, x_scale, x_zero_point, **kwargs): return self.layer( op="DequantizeLinear", inputs=[inp, x_scale, x_zero_point], outputs=["dequantize_linear_out"], - **kwargs + **kwargs, )[0] + def save(graph, model_name): path = os.path.join(CURDIR, model_name) print(f"Writing: {path}") onnx.save(gs.export_onnx(graph), path) + def make_sparse(graph): sparsity_pruner = SparsityPruner(gs.export_onnx(graph)) return gs.import_onnx(sparsity_pruner.prune()) + # Generates a model with multiple inputs/outputs: # # X0 Y0 @@ -286,10 +307,16 @@ def make_needs_constraints(): x = gs.Variable("x", shape=(1, 1, SIZE, SIZE), dtype=np.float32) I_rot90 = gs.Constant( - name="I_rot90", values=np.rot90(np.identity(SIZE, dtype=np.float32).reshape((1, 1, SIZE, SIZE))) + name="I_rot90", + values=np.rot90( + np.identity(SIZE, dtype=np.float32).reshape((1, 1, SIZE, SIZE)) + ), ) fp16_max = gs.Constant( - name="fp16_max", values=np.array([np.finfo(np.float16).max], dtype=np.float32).reshape((1, 1, 1, 1)) + name="fp16_max", + values=np.array([np.finfo(np.float16).max], dtype=np.float32).reshape( + (1, 1, 1, 1) + ), ) graph = gs.Graph(inputs=[x]) @@ -318,7 +345,9 @@ def make_needs_constraints(): def make_constant_fold_bloater(): graph = gs.Graph() # Input is 1MiB, tiled to 10MiB - out = graph.tile(np.ones(shape=(1024, 256), dtype=np.float32), repeats=np.array([1, 10])) + out = graph.tile( + np.ones(shape=(1024, 256), dtype=np.float32), repeats=np.array([1, 10]) + ) out.dtype = np.float32 graph.outputs = [out] @@ -404,17 +433,29 @@ def make_multi_output(): def make_unbounded_dds(): input = gs.Variable("Input", shape=(1, 3, 10, 10), dtype=np.float32) graph = gs.Graph(inputs=[input], opset=13) - weights_0 = graph.constant(gs.Constant("Weights_0", values=np.ones((3, 3, 3, 3), dtype=np.float32))) - weights_1 = graph.constant(gs.Constant("Weights_1", values=np.ones((4, 1, 1, 1), dtype=np.float32))) + weights_0 = graph.constant( + gs.Constant("Weights_0", values=np.ones((3, 3, 3, 3), dtype=np.float32)) + ) + weights_1 = graph.constant( + gs.Constant("Weights_1", values=np.ones((4, 1, 1, 1), dtype=np.float32)) + ) conv_0 = graph.conv(input, weights_0, [3, 3], name="Conv_0") reduce_max_0 = graph.reduce_max(conv_0, keep_dims=0, name="ReduceMax_0") - cast_0 = graph.cast(reduce_max_0, getattr(onnx.TensorProto, "INT64"), name="Cast_to_int64") - range_0 = graph.onnx_range(np.array(0, dtype=np.int64), cast_0, np.array(1, dtype=np.int64), name="Range") - cast_1 = graph.cast(range_0, getattr(onnx.TensorProto, "FLOAT"), name="Cast_to_float") + cast_0 = graph.cast( + reduce_max_0, getattr(onnx.TensorProto, "INT64"), name="Cast_to_int64" + ) + range_0 = graph.onnx_range( + np.array(0, dtype=np.int64), cast_0, np.array(1, dtype=np.int64), name="Range" + ) + cast_1 = graph.cast( + range_0, getattr(onnx.TensorProto, "FLOAT"), name="Cast_to_float" + ) - reshape_1 = graph.reshape(cast_1, np.array([1, 1, -1, 1], dtype=np.int64), name="Reshape_1") + reshape_1 = graph.reshape( + cast_1, np.array([1, 1, -1, 1], dtype=np.int64), name="Reshape_1" + ) conv_1 = graph.conv(reshape_1, weights_1, [1, 1], name="Conv_1") graph.outputs = [conv_1] @@ -442,7 +483,8 @@ def make_small_matmul(name, dtype, save_sparse=False): save(g, name) if save_sparse: - save(make_sparse(g), 'sparse.'+name) + save(make_sparse(g), "sparse." + name) + make_small_matmul("matmul.onnx", np.float32, save_sparse=True) make_small_matmul("matmul.fp16.onnx", np.float16) @@ -457,14 +499,18 @@ def make_small_conv(name): F = 4 a = gs.Variable("a", shape=(N, C, H, W), dtype=np.float32) g = gs.Graph(inputs=[a], opset=13) - val = np.random.uniform(-3, 3, size=K * C * F * F).reshape((K, C, F, F)).astype(np.float32) + val = ( + np.random.uniform(-3, 3, size=K * C * F * F) + .reshape((K, C, F, F)) + .astype(np.float32) + ) b = gs.Constant("b", values=val) c = g.conv(a, b, (F, F), name="conv") c.dtype = np.float32 g.outputs = [c] save(g, name) - save(make_sparse(g), 'sparse.'+name) + save(make_sparse(g), "sparse." + name) make_small_conv("conv.onnx") @@ -480,13 +526,18 @@ def make_unsorted(): make_unsorted() + + def make_empty(): g = gs.Graph(inputs=[], opset=13) g.outputs = [] save(g, "empty.onnx") + + make_empty() + # Builds a graph that has unused nodes and inputs. # # f e @@ -517,8 +568,11 @@ def make_cleanable(): graph = gs.Graph(nodes=nodes, inputs=[e, f], outputs=[i]) save(graph, "cleanable.onnx") + + make_cleanable() + # Generates a graph with very deranged names # Tests that the unique renaming in lint tool works def make_renamable(): @@ -530,19 +584,26 @@ def make_renamable(): nodes = [ gs.Node(op="Identity", name="", inputs=[a], outputs=[b]), - gs.Node(op="Dropout", name="polygraphy_unnamed_node_0", inputs=[b], outputs=[c]), - gs.Node(op="Identity", name="polygraphy_unnamed_node_0_0", inputs=[c], outputs=[d]), + gs.Node( + op="Dropout", name="polygraphy_unnamed_node_0", inputs=[b], outputs=[c] + ), + gs.Node( + op="Identity", name="polygraphy_unnamed_node_0_0", inputs=[c], outputs=[d] + ), gs.Node(op="Dropout", name="", inputs=[d], outputs=[e]), ] graph = gs.Graph(nodes=nodes, inputs=[a], outputs=[e]) save(graph, "renamable.onnx") + + make_renamable() ####### Generate some invalid models ####### ### Graphs whose errors are data-dependent ### + # Generats an invalid graph with multiple parallel bad nodes. # The graph is invalid due to multiple parallel nodes failing. # This is is the graph: @@ -572,25 +633,34 @@ def make_bad_graph_with_parallel_invalid_nodes(): A = gs.Variable("A", dtype=DTYPE, shape=(1, BAD_DIM)) B = gs.Variable("B", dtype=DTYPE, shape=(4, 4)) - mm_ab_out = graph.matmul(A, B, name="MatMul_0") # This node will fail because A and B are not compatible. + mm_ab_out = graph.matmul( + A, B, name="MatMul_0" + ) # This node will fail because A and B are not compatible. C = gs.Variable("C", dtype=DTYPE, shape=(BAD_DIM, 4)) D = gs.Variable("D", dtype=DTYPE, shape=(4, 1)) - add_cd_out = graph.add(C, D, name="Add_0") # This node will fail because C and D are not compatible. + add_cd_out = graph.add( + C, D, name="Add_0" + ) # This node will fail because C and D are not compatible. pre_out_1 = graph.matmul(mm_ab_out, add_cd_out, name="MatMul_2") E = gs.Variable("E", dtype=DTYPE, shape=(1, 4)) F = gs.Variable("F", dtype=DTYPE, shape=(4, 1)) mm_ef_out = graph.matmul(E, F, name="MatMul_1") - mm_ef_out_int64 = graph.cast(mm_ef_out, onnx.TensorProto.INT64, name="cast_to_int64") - + mm_ef_out_int64 = graph.cast( + mm_ef_out, onnx.TensorProto.INT64, name="cast_to_int64" + ) G = gs.Variable("G", dtype=np.int64, shape=(4, 4)) - nz_g_out = graph.nonzero(G, name="NonZero") # `nz_g_out` shape is data-dependent. + nz_g_out = graph.nonzero(G, name="NonZero") # `nz_g_out` shape is data-dependent. - pre_out_2 = graph.matmul(mm_ef_out_int64, nz_g_out, name="MatMul_3") # This node will fail because `mm_ef_out_int64` and `nz_g_out` are not compatible. - pre_out_2_float = graph.cast(pre_out_2, getattr(onnx.TensorProto, "FLOAT"), name="cast_to_float") + pre_out_2 = graph.matmul( + mm_ef_out_int64, nz_g_out, name="MatMul_3" + ) # This node will fail because `mm_ef_out_int64` and `nz_g_out` are not compatible. + pre_out_2_float = graph.cast( + pre_out_2, getattr(onnx.TensorProto, "FLOAT"), name="cast_to_float" + ) out = graph.add(pre_out_1, pre_out_2_float, name="Add_1") out.dtype = DTYPE @@ -600,6 +670,7 @@ def make_bad_graph_with_parallel_invalid_nodes(): save(graph, "bad_graph_with_parallel_invalid_nodes.onnx") + make_bad_graph_with_parallel_invalid_nodes() @@ -620,11 +691,13 @@ def make_bad_graph_with_parallel_invalid_nodes(): # This graph is useful to check whether the error message is caught or not at runtime based on data input. # def make_bad_graph_conditionally_invalid(): - X = [[4.0], [3.0]] # shape (2, 1), compatible with Z for MatMul - Y = [2.0, 4.0] # shape (2,), incompatible with Z for MatMul - Z = [[2.0, 4.0]] # shape (1, 2) + X = [[4.0], [3.0]] # shape (2, 1), compatible with Z for MatMul + Y = [2.0, 4.0] # shape (2,), incompatible with Z for MatMul + Z = [[2.0, 4.0]] # shape (1, 2) - cond = gs.Variable("cond", dtype=np.bool_, shape=(1,)) # input to If, True or False based on user input. + cond = gs.Variable( + "cond", dtype=np.bool_, shape=(1,) + ) # input to If, True or False based on user input. graph = gs.Graph(name="bad_graph_conditionally_invalid") @@ -634,18 +707,34 @@ def make_bad_graph_conditionally_invalid(): then_out = gs.Variable("then_out", dtype=np.float32, shape=None) else_out = gs.Variable("else_out", dtype=np.float32, shape=None) - then_const_node = gs.Node(op="Constant", inputs=[], outputs=[then_out], attrs={"value":x}) # node for `then_branch` Graph - else_const_node = gs.Node(op="Constant", inputs=[], outputs=[else_out], attrs={"value":y}) # node for `else_branch` Graph + then_const_node = gs.Node( + op="Constant", inputs=[], outputs=[then_out], attrs={"value": x} + ) # node for `then_branch` Graph + else_const_node = gs.Node( + op="Constant", inputs=[], outputs=[else_out], attrs={"value": y} + ) # node for `else_branch` Graph - then_body = gs.Graph(nodes=[then_const_node], name="then_body", inputs=[], outputs=[then_out]) # Graph for `then_branch` - else_body = gs.Graph(nodes=[else_const_node], name="else_body", inputs=[], outputs=[else_out]) # Graph for `else_branch` + then_body = gs.Graph( + nodes=[then_const_node], name="then_body", inputs=[], outputs=[then_out] + ) # Graph for `then_branch` + else_body = gs.Graph( + nodes=[else_const_node], name="else_body", inputs=[], outputs=[else_out] + ) # Graph for `else_branch` res = gs.Variable("res", dtype=np.float32, shape=None) # shape is data-dependent - if_node = gs.Node(op="If", name="If_Node", inputs=[cond], outputs=[res], attrs={"then_branch":then_body, "else_branch":else_body}) + if_node = gs.Node( + op="If", + name="If_Node", + inputs=[cond], + outputs=[res], + attrs={"then_branch": then_body, "else_branch": else_body}, + ) graph.nodes = [if_node] - out = graph.matmul(res, gs.Constant("z", values=np.array(Z, dtype=np.float32)), name="MatMul") + out = graph.matmul( + res, gs.Constant("z", values=np.array(Z, dtype=np.float32)), name="MatMul" + ) out.dtype = np.float32 graph.inputs = [cond] @@ -653,12 +742,14 @@ def make_bad_graph_conditionally_invalid(): save(graph, "bad_graph_conditionally_invalid.onnx") + make_bad_graph_conditionally_invalid() ### Bad GraphProto ### ### Graphs that break the ONNX Specification for GraphProto ### + # Generates a model where the GraphProto has no name. # # This is invalid as ONNX Specification requires that the GraphProto has a name. @@ -679,6 +770,7 @@ def make_bad_graph_with_no_name(): make_bad_graph_with_no_name() + # Generates a model where the GraphProto has no imports. # # This is invalid as ONNX Specification requires that the GraphProto has at least one import. @@ -699,6 +791,7 @@ def make_bad_graph_with_no_import_domains(): make_bad_graph_with_no_import_domains() + # Generates a model where the inputs (value info) of graph are duplicates. # # This is invalid as ONNX Specification requires that the (value info) inputs of a graph are unique. @@ -735,16 +828,18 @@ def make_bad_graph_multi_level_errors(): inp1 = gs.Variable("inp1", dtype=DTYPE, shape=SHAPE) inp2 = gs.Variable("inp2", dtype=DTYPE, shape=SHAPE) - graph = gs.Graph(inputs=[inp1, inp2], name="") # graph-level error: empty name - out = graph.matmul(inp1, inp2) # node-level error: incompatible inputs + graph = gs.Graph(inputs=[inp1, inp2], name="") # graph-level error: empty name + out = graph.matmul(inp1, inp2) # node-level error: incompatible inputs out.dtype = DTYPE - out.shape = [] # we need to specify this so GS creates valid ONNX model. + out.shape = [] # we need to specify this so GS creates valid ONNX model. graph.outputs = [out] save(graph, "bad_graph_with_multi_level_errors.onnx") + make_bad_graph_multi_level_errors() + # Generates a model where graph has multiple node names with same non-empty string. def make_bad_graph_with_duplicate_node_names(): DTYPE = np.float32 @@ -754,12 +849,17 @@ def make_bad_graph_with_duplicate_node_names(): graph = gs.Graph(inputs=[inp], name="bad_graph_with_duplicate_node_names") inter1 = graph.identity(inp, name="identical") - out = graph.identity(inter1, name="identical") # node-level error: duplicate node names + out = graph.identity( + inter1, name="identical" + ) # node-level error: duplicate node names graph.outputs = [out] save(graph, "bad_graph_with_duplicate_node_names.onnx") + + make_bad_graph_with_duplicate_node_names() + # Generates a model where the graph has a subgraph matching toyPlugin's graph pattern def make_graph_with_subgraph_matching_toy_plugin(): i0 = gs.Variable(name="i0", dtype=np.float32) @@ -774,15 +874,22 @@ def make_graph_with_subgraph_matching_toy_plugin(): O_node = gs.Node(op="O", inputs=[i0], outputs=[i1], name="n1") A_node = gs.Node(op="A", inputs=[i1], outputs=[i2], name="n2") B_node = gs.Node(op="B", inputs=[i1], outputs=[i3], name="n3") - C_node = gs.Node(op="C", inputs=[i2,i3], outputs=[i4], attrs={"x":1}, name="n4") + C_node = gs.Node(op="C", inputs=[i2, i3], outputs=[i4], attrs={"x": 1}, name="n4") D_node = gs.Node(op="D", inputs=[i4], outputs=[o1], name="n5") E_node = gs.Node(op="E", inputs=[i4], outputs=[o2], name="n6") - graph = gs.Graph(nodes=[O_node, A_node, B_node, C_node, D_node, E_node], inputs=[i0], outputs=[o1,o2]) + graph = gs.Graph( + nodes=[O_node, A_node, B_node, C_node, D_node, E_node], + inputs=[i0], + outputs=[o1, o2], + ) + + save(graph, "toy_subgraph.onnx") + - save(graph, "graph_with_subgraph_matching_toy_plugin.onnx") make_graph_with_subgraph_matching_toy_plugin() + # Generates the following Graph # # The input to the Transpose op is an initializer @@ -808,8 +915,10 @@ def make_transpose_matmul(): save(g, "transpose_matmul.onnx") + make_transpose_matmul() + # Generates the following Graph # # The input to the QuantizeLinear op is an initializer @@ -823,7 +932,11 @@ def make_transpose_matmul(): # out # def make_qdq_conv(): - x = np.random.uniform(-3, 3, size=3*3*130).astype(np.float32).reshape((1, 3, 3, 130)) + x = ( + np.random.uniform(-3, 3, size=3 * 3 * 130) + .astype(np.float32) + .reshape((1, 3, 3, 130)) + ) y_scale = np.array([2, 4, 5], dtype=np.float32) y_zero_point = np.array([84, 24, 196], dtype=np.uint8) x_const = gs.Constant("x", values=x) @@ -841,14 +954,17 @@ def make_qdq_conv(): save(g, "qdq_conv.onnx") + make_qdq_conv() + def make_weightless_network(model_name): ipath = ONNX_MODELS[model_name].path opath = os.path.join(CURDIR, "weightless." + model_name + ".onnx") cmd = [f"polygraphy surgeon weight-strip {ipath} -o {opath}"] subprocess.run(cmd, shell=True) + make_weightless_network("matmul.fp16") make_weightless_network("matmul.bf16") make_weightless_network("sparse.matmul") diff --git a/tools/Polygraphy/tests/models/meta.py b/tools/Polygraphy/tests/models/meta.py index d0ed444d..04d8217b 100644 --- a/tools/Polygraphy/tests/models/meta.py +++ b/tools/Polygraphy/tests/models/meta.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -35,7 +35,9 @@ def model_path(name=None): class Model: - def __init__(self, path, LoaderType, check_runner, input_metadata=None, ext_data=None): + def __init__( + self, path, LoaderType, check_runner, input_metadata=None, ext_data=None + ): self.path = path self.loader = LoaderType(self.path) self.check_runner = check_runner @@ -44,14 +46,21 @@ def __init__(self, path, LoaderType, check_runner, input_metadata=None, ext_data def check_tf_identity(runner): - feed_dict = {"Input:0": np.random.random_sample(size=(1, 15, 25, 30)).astype(np.float32)} + feed_dict = { + "Input:0": np.random.random_sample(size=(1, 15, 25, 30)).astype(np.float32) + } outputs = runner.infer(feed_dict) assert np.all(outputs["Identity_2:0"] == feed_dict["Input:0"]) + MODELS_DIR = os.path.join(os.path.dirname(__file__)) TF_MODELS = { - "identity": Model(path=model_path("tf_identity.pb"), LoaderType=GraphFromFrozen, check_runner=check_tf_identity), + "identity": Model( + path=model_path("tf_identity.pb"), + LoaderType=GraphFromFrozen, + check_runner=check_tf_identity, + ), } @@ -77,7 +86,14 @@ def check_empty_tensor_expand(runner, shapes): shape = shapes["new_shape"] feed_dict = { "data": np.zeros(shape=(2, 0, 3, 0), dtype=np.float32), - "new_shape": np.array(shape, dtype=np.int32 if mod.version(trt.__version__) < mod.version("9.0") else np.int64), + "new_shape": np.array( + shape, + dtype=( + np.int32 + if mod.version(trt.__version__) < mod.version("9.0") + else np.int64 + ), + ), } outputs = runner.infer(feed_dict) # Empty tensor will still be empty after broadcast @@ -91,16 +107,24 @@ def check_reshape(runner): assert np.all(outputs["output"] == feed_dict["data"].ravel()) -def check_residual_block(runner,shapes): - feed_dict = {"gpu_0/data_0": np.random.random_sample(size=shapes["gpu_0/data_0"]).astype(np.float32)} +def check_residual_block(runner, shapes): + feed_dict = { + "gpu_0/data_0": np.random.random_sample(size=shapes["gpu_0/data_0"]).astype( + np.float32 + ) + } # Confirm inference can go through without error outputs = runner.infer(feed_dict) + def check_matmul_2layer(runner, shape=(2, 8)): - feed_dict = {"onnx::MatMul_0": np.random.random_sample(size=shape).astype(np.float32)} + feed_dict = { + "onnx::MatMul_0": np.random.random_sample(size=shape).astype(np.float32) + } # Confirm inference can go through without error outputs = runner.infer(feed_dict) + def no_check_implemented(runner): raise NotImplementedError("No check_runner implemented for this model") @@ -110,42 +134,76 @@ def no_check_implemented(runner): path=model_path("identity.onnx"), LoaderType=BytesFromPath, check_runner=check_identity, - input_metadata=TensorMetadata().add("x", dtype=DataType.FLOAT32, shape=(1, 1, 2, 2)), + input_metadata=TensorMetadata().add( + "x", dtype=DataType.FLOAT32, shape=(1, 1, 2, 2) + ), ), "identity_identity": Model( - path=model_path("identity_identity.onnx"), LoaderType=BytesFromPath, check_runner=check_identity_identity + path=model_path("identity_identity.onnx"), + LoaderType=BytesFromPath, + check_runner=check_identity_identity, ), "dynamic_identity": Model( path=model_path("dynamic_identity.onnx"), LoaderType=BytesFromPath, check_runner=check_dynamic_identity, - input_metadata=TensorMetadata().add("X", dtype=DataType.FLOAT32, shape=(1, 1, -1, -1)), + input_metadata=TensorMetadata().add( + "X", dtype=DataType.FLOAT32, shape=(1, 1, -1, -1) + ), ), "identity_multi_ch": Model( path=model_path("identity_multi_ch.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented, - input_metadata=TensorMetadata().add("x", dtype=DataType.FLOAT32, shape=(2, 4, 3, 3)), + input_metadata=TensorMetadata().add( + "x", dtype=DataType.FLOAT32, shape=(2, 4, 3, 3) + ), ), "empty_tensor_expand": Model( - path=model_path("empty_tensor_expand.onnx"), LoaderType=BytesFromPath, check_runner=check_empty_tensor_expand + path=model_path("empty_tensor_expand.onnx"), + LoaderType=BytesFromPath, + check_runner=check_empty_tensor_expand, + ), + "and": Model( + path=model_path("and.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, + ), + "scan": Model( + path=model_path("scan.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), - "and": Model(path=model_path("and.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented), - "scan": Model(path=model_path("scan.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented), "pow_scalar": Model( - path=model_path("pow_scalar.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("pow_scalar.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, + ), + "dim_param": Model( + path=model_path("dim_param.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), - "dim_param": Model(path=model_path("dim_param.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented), "tensor_attr": Model( - path=model_path("tensor_attr.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("tensor_attr.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "identity_with_initializer": Model( - path=model_path("identity_with_initializer.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("identity_with_initializer.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "const_foldable": Model( - path=model_path("const_foldable.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("const_foldable.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, + ), + "reshape": Model( + path=model_path("reshape.onnx"), + LoaderType=BytesFromPath, + check_runner=check_reshape, ), - "reshape": Model(path=model_path("reshape.onnx"), LoaderType=BytesFromPath, check_runner=check_reshape), "reducable": Model( path=model_path("reducable.onnx"), LoaderType=BytesFromPath, @@ -172,142 +230,226 @@ def no_check_implemented(runner): ext_data=model_path("ext_weights_same_dir"), ), "capability": Model( - path=model_path("capability.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("capability.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "instancenorm": Model( - path=model_path("instancenorm.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("instancenorm.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "add_with_dup_inputs": Model( - path=model_path("add_with_dup_inputs.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("add_with_dup_inputs.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "needs_constraints": Model( path=model_path("needs_constraints.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented, - input_metadata=TensorMetadata().add("x", dtype=DataType.FLOAT32, shape=(1, 1, 256, 256)), + input_metadata=TensorMetadata().add( + "x", dtype=DataType.FLOAT32, shape=(1, 1, 256, 256) + ), ), "constant_fold_bloater": Model( path=model_path("constant_fold_bloater.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented, ), - "renamable" : Model( + "renamable": Model( path=model_path("renamable.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented, ), - "cleanable" : Model( + "cleanable": Model( path=model_path("cleanable.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented, ), - "nonzero": Model(path=model_path("nonzero.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented), + "nonzero": Model( + path=model_path("nonzero.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, + ), "inp_dim_val_not_set": Model( - path=model_path("inp_dim_val_not_set.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("inp_dim_val_not_set.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "multi_output": Model( - path=model_path("multi_output.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("multi_output.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "unbounded_dds": Model( - path=model_path("unbounded_dds.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("unbounded_dds.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "loop": Model( - path=model_path("loop.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("loop.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "matmul.fp16": Model( - path=model_path("matmul.fp16.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("matmul.fp16.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "matmul": Model( - path=model_path("matmul.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("matmul.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "sparse.matmul": Model( - path=model_path("sparse.matmul.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("sparse.matmul.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "matmul.bf16": Model( - path=model_path("matmul.bf16.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("matmul.bf16.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "matmul.bf16.i32data": Model( - path=model_path("matmul.bf16.i32data.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("matmul.bf16.i32data.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "matmul_2layer": Model( - path=model_path("matmul_2layer.onnx"), LoaderType=BytesFromPath, check_runner=check_matmul_2layer + path=model_path("matmul_2layer.onnx"), + LoaderType=BytesFromPath, + check_runner=check_matmul_2layer, + ), + "unsorted": Model( + path=model_path("unsorted.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), - "unsorted": Model(path=model_path("unsorted.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented), "conv": Model( - path=model_path("conv.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("conv.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "sparse.conv": Model( - path=model_path("sparse.conv.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("sparse.conv.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "no_op_reshape": Model( - path=model_path("no_op_reshape.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("no_op_reshape.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "bad_graph_with_dup_value_info": Model( - path=model_path("bad_graph_with_dup_value_info.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("bad_graph_with_dup_value_info.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "bad_graph_with_no_name": Model( - path=model_path("bad_graph_with_no_name.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("bad_graph_with_no_name.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "bad_graph_with_no_import_domains": Model( - path=model_path("bad_graph_with_no_import_domains.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("bad_graph_with_no_import_domains.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "bad_graph_with_parallel_invalid_nodes": Model( - path=model_path("bad_graph_with_parallel_invalid_nodes.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("bad_graph_with_parallel_invalid_nodes.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "bad_graph_conditionally_invalid": Model( - path=model_path("bad_graph_conditionally_invalid.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("bad_graph_conditionally_invalid.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "custom_op_node": Model( - path=model_path("custom_op_node.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("custom_op_node.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "bad_graph_with_duplicate_node_names": Model( - path=model_path("bad_graph_with_duplicate_node_names.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("bad_graph_with_duplicate_node_names.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "bad_graph_with_multi_level_errors": Model( - path=model_path("bad_graph_with_multi_level_errors.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("bad_graph_with_multi_level_errors.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "invalid": Model( - path=model_path("invalid.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("invalid.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "empty": Model( - path=model_path("empty.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("empty.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "residual_block": Model( - path=model_path("residual_block.onnx"), LoaderType=BytesFromPath, check_runner=check_residual_block + path=model_path("residual_block.onnx"), + LoaderType=BytesFromPath, + check_runner=check_residual_block, ), "graph_with_subgraph_matching_toy_plugin": Model( - path=model_path("graph_with_subgraph_matching_toy_plugin.onnx"), - LoaderType=BytesFromPath, - check_runner=no_check_implemented + path=model_path("toy_subgraph.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "transpose_matmul": Model( - path=model_path("transpose_matmul.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("transpose_matmul.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "qdq_conv": Model( - path=model_path("qdq_conv.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("qdq_conv.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "weightless.matmul.fp16": Model( - path=model_path("weightless.matmul.fp16.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("weightless.matmul.fp16.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "weightless.matmul.bf16": Model( - path=model_path("weightless.matmul.bf16.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("weightless.matmul.bf16.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "weightless.conv": Model( - path=model_path("weightless.conv.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("weightless.conv.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "weightless.sparse.matmul": Model( - path=model_path("weightless.sparse.matmul.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("weightless.sparse.matmul.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "weightless.sparse.conv": Model( - path=model_path("weightless.sparse.conv.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("weightless.sparse.conv.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "weightless.transpose_matmul": Model( - path=model_path("weightless.transpose_matmul.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("weightless.transpose_matmul.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "weightless.qdq_conv": Model( - path=model_path("weightless.qdq_conv.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("weightless.qdq_conv.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), "roialign": Model( - path=model_path("roialign.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + path=model_path("roialign.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, ), } diff --git a/tools/Polygraphy/tests/models/plugins/toyPlugin/pattern.py b/tools/Polygraphy/tests/models/plugins/toyPlugin/pattern.py index 6cf600ba..0c299e13 100644 --- a/tools/Polygraphy/tests/models/plugins/toyPlugin/pattern.py +++ b/tools/Polygraphy/tests/models/plugins/toyPlugin/pattern.py @@ -1,7 +1,8 @@ from polygraphy import mod +from typing import List,Dict gs = mod.lazy_import("onnx_graphsurgeon>=0.5.0") -def get_plugin_pattern() -> gs.GraphPattern: +def get_plugin_pattern(): """ Toy plugin pattern: A B @@ -23,9 +24,25 @@ def get_plugin_pattern() -> gs.GraphPattern: return pattern -def get_plugin_attributes(sg) -> dict: - """ - example plugin attribute mapping, where the plugin has attribute ToyX, which gets its value from C.x * 2 - """ - return {"ToyX": int(sg.get("Cnode").attrs["x"]) * 2} +def get_matching_subgraphs(graph) -> List[Dict[str,str]]: + gp = get_plugin_pattern() + matches = gp.match_all(graph) + ans = [] + for m in matches: + # save the input and output tensor names of the matching subgraph(s) + input_tensors = list(set([ip_tensor.name for ip_tensor in m.inputs])) + output_tensors = list(set([op_tensor.name for op_tensor in m.outputs])) + + attrs = {"ToyX": int(m.get("Cnode").attrs["x"]) * 2} + ioa = { + 'inputs':input_tensors, + 'outputs':output_tensors, + 'attributes':attrs + } + ans.append(ioa) + return ans +def get_plugin_metadata() -> Dict[str,str]: + return {'name':'toyPlugin', + 'op':'CustomToyPlugin', + } diff --git a/tools/Polygraphy/tests/models/graph_with_subgraph_matching_toy_plugin.onnx b/tools/Polygraphy/tests/models/toy_subgraph.onnx similarity index 100% rename from tools/Polygraphy/tests/models/graph_with_subgraph_matching_toy_plugin.onnx rename to tools/Polygraphy/tests/models/toy_subgraph.onnx diff --git a/tools/Polygraphy/tests/test_deprecated_aliases.py b/tools/Polygraphy/tests/test_deprecated_aliases.py index dac09083..bedd2936 100644 --- a/tools/Polygraphy/tests/test_deprecated_aliases.py +++ b/tools/Polygraphy/tests/test_deprecated_aliases.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/test_examples.py b/tools/Polygraphy/tests/test_examples.py index a500e230..2333e2fd 100644 --- a/tools/Polygraphy/tests/test_examples.py +++ b/tools/Polygraphy/tests/test_examples.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -354,7 +354,8 @@ def test_api_examples(example, sandboxed_install_run): ), # Plugin Example( - ["cli", "plugin", "01_match_and_replace_plugin"], artifact_names=["config.yaml", "replaced.onnx"] + ["cli", "plugin", "01_match_and_replace_plugin"], + artifact_names=["config.yaml", "replaced.onnx"] ), ] diff --git a/tools/Polygraphy/tests/test_packaging.py b/tools/Polygraphy/tests/test_packaging.py index ef556620..135181fb 100644 --- a/tools/Polygraphy/tests/test_packaging.py +++ b/tools/Polygraphy/tests/test_packaging.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -30,7 +30,9 @@ def test_install(self, virtualenv): virtualenv.run(["python3", "-c", "import polygraphy"]) # Newer versions of setuptools break pytest-virtualenv - virtualenv.run([virtualenv.python, "-m", "pip", "install", "setuptools==59.6.0"]) + virtualenv.run( + [virtualenv.python, "-m", "pip", "install", "setuptools==59.6.0"] + ) virtualenv.run(["make", "install"], cwd=ROOT_DIR) @@ -44,8 +46,12 @@ def test_install(self, virtualenv): assert not os.path.exists(os.path.join(poly_pkg.source_path, "tests")) EXCLUDE_FILES = ["__pycache__"] - all_poly_files = glob.glob(os.path.join(poly_pkg.source_path, "polygraphy", "*")) - all_poly_files = [f for f in map(os.path.basename, all_poly_files) if f not in EXCLUDE_FILES] + all_poly_files = glob.glob( + os.path.join(poly_pkg.source_path, "polygraphy", "*") + ) + all_poly_files = [ + f for f in map(os.path.basename, all_poly_files) if f not in EXCLUDE_FILES + ] # NOTE: This should be updated when new files are added to the top-level package. EXPECTED_FILES = set( diff --git a/tools/Polygraphy/tests/test_tests.py b/tools/Polygraphy/tests/test_tests.py index a621f100..a077e4c8 100644 --- a/tools/Polygraphy/tests/test_tests.py +++ b/tools/Polygraphy/tests/test_tests.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,7 +22,9 @@ def test_sandboxed_install_run(sandboxed_install_run): - status = sandboxed_install_run(["python3", "-c", "import colored; print(colored.__path__)"]) + status = sandboxed_install_run( + ["python3", "-c", "import colored; print(colored.__path__)"] + ) assert status.success original_path = status.stdout @@ -30,7 +32,9 @@ def test_sandboxed_install_run(sandboxed_install_run): status = sandboxed_install_run(["python3", "-m", "pip", "install", "colored"]) assert status.success - status = sandboxed_install_run(["python3", "-c", "import colored; print(colored.__path__)"]) + status = sandboxed_install_run( + ["python3", "-c", "import colored; print(colored.__path__)"] + ) assert status.success venv_path = status.stdout diff --git a/tools/Polygraphy/tests/test_ux.py b/tools/Polygraphy/tests/test_ux.py index 67d08d31..82c10047 100644 --- a/tools/Polygraphy/tests/test_ux.py +++ b/tools/Polygraphy/tests/test_ux.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -50,7 +50,9 @@ def test_links_valid(self, readme): if link.startswith("https://"): assert requests.get(link).status_code == 200 else: - assert os.path.pathsep * 2 not in link, "Duplicate slashes break links in GitHub" + assert ( + os.path.pathsep * 2 not in link + ), "Duplicate slashes break links in GitHub" # NOTE: We cannot use repo-root-relative links in Markdown since Polygraphy is also # a subfolder of the OSS repo. assert not link.startswith("/") diff --git a/tools/Polygraphy/tests/tools/args/backend/onnx/test_loader.py b/tools/Polygraphy/tests/tools/args/backend/onnx/test_loader.py index 4011056e..23cfa1ef 100644 --- a/tools/Polygraphy/tests/tools/args/backend/onnx/test_loader.py +++ b/tools/Polygraphy/tests/tools/args/backend/onnx/test_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,7 +23,13 @@ import onnx_graphsurgeon as gs from polygraphy import util from polygraphy.backend.onnx import onnx_from_path -from polygraphy.tools.args import DataLoaderArgs, ModelArgs, OnnxLoadArgs, OnnxSaveArgs, OnnxInferShapesArgs +from polygraphy.tools.args import ( + DataLoaderArgs, + ModelArgs, + OnnxLoadArgs, + OnnxSaveArgs, + OnnxInferShapesArgs, +) from polygraphy.tools.script import Script from tests.helper import is_file_empty, is_file_non_empty from tests.models.meta import ONNX_MODELS @@ -38,8 +44,12 @@ def _check_ext_weights_model(model): class TestOnnxLoaderArgs: def test_basic(self): - arg_group = ArgGroupTestHelper(OnnxLoadArgs(), deps=[ModelArgs(), OnnxInferShapesArgs()]) - arg_group.parse_args([ONNX_MODELS["identity_identity"].path, "--onnx-outputs=identity_out_0"]) + arg_group = ArgGroupTestHelper( + OnnxLoadArgs(), deps=[ModelArgs(), OnnxInferShapesArgs()] + ) + arg_group.parse_args( + [ONNX_MODELS["identity_identity"].path, "--onnx-outputs=identity_out_0"] + ) model = arg_group.load_onnx() assert len(model.graph.output) == 1 @@ -48,7 +58,10 @@ def test_basic(self): @pytest.mark.parametrize("global_upper_bound", [None, "2000"]) @pytest.mark.parametrize("specified_upper_bound", [None, "cast_out_6:4000"]) def test_setting_upper_bounds(self, global_upper_bound, specified_upper_bound): - arg_group = ArgGroupTestHelper(OnnxLoadArgs(allow_setting_upper_bounds = True), deps=[ModelArgs(), OnnxInferShapesArgs()]) + arg_group = ArgGroupTestHelper( + OnnxLoadArgs(allow_setting_upper_bounds=True), + deps=[ModelArgs(), OnnxInferShapesArgs()], + ) cmd = [ONNX_MODELS["unbounded_dds"].path, "--set-unbounded-dds-upper-bound"] upper_bound = "1000" @@ -67,7 +80,7 @@ def test_setting_upper_bounds(self, global_upper_bound, specified_upper_bound): # Check if there is a Min operator in the modified model find_min = False for node in graph.nodes: - if node.op == 'Min': + if node.op == "Min": find_min = True # Check if the Min operator's second input is a constant tensor assert isinstance(node.inputs[1], gs.Constant) @@ -75,17 +88,21 @@ def test_setting_upper_bounds(self, global_upper_bound, specified_upper_bound): val = node.inputs[1].values # Check if the constant value equals the target upper bound assert str(val) == upper_bound - assert (find_min) + assert find_min def test_external_data(self): - arg_group = ArgGroupTestHelper(OnnxLoadArgs(), deps=[ModelArgs(), OnnxInferShapesArgs()]) + arg_group = ArgGroupTestHelper( + OnnxLoadArgs(), deps=[ModelArgs(), OnnxInferShapesArgs()] + ) model = ONNX_MODELS["ext_weights"] arg_group.parse_args([model.path, "--external-data-dir", model.ext_data]) model = arg_group.load_onnx() _check_ext_weights_model(model) def test_ignore_external_data(self): - arg_group = ArgGroupTestHelper(OnnxLoadArgs(), deps=[ModelArgs(), OnnxInferShapesArgs()]) + arg_group = ArgGroupTestHelper( + OnnxLoadArgs(), deps=[ModelArgs(), OnnxInferShapesArgs()] + ) model = ONNX_MODELS["ext_weights"] arg_group.parse_args([model.path, "--ignore-external-data"]) model = arg_group.load_onnx() @@ -94,10 +111,13 @@ def test_ignore_external_data(self): @pytest.mark.parametrize("allow_onnxruntime", [True, False]) def test_shape_inference(self, allow_onnxruntime): # When using shape inference, we should load directly from the path - arg_group = ArgGroupTestHelper(OnnxLoadArgs(), deps=[ModelArgs(), OnnxInferShapesArgs()]) + arg_group = ArgGroupTestHelper( + OnnxLoadArgs(), deps=[ModelArgs(), OnnxInferShapesArgs()] + ) model = ONNX_MODELS["identity"] arg_group.parse_args( - [model.path, "--shape-inference"] + (["--no-onnxruntime-shape-inference"] if not allow_onnxruntime else []) + [model.path, "--shape-inference"] + + (["--no-onnxruntime-shape-inference"] if not allow_onnxruntime else []) ) assert arg_group.must_use_onnx_loader() @@ -114,7 +134,9 @@ def test_shape_inference(self, allow_onnxruntime): @pytest.mark.parametrize("allow_onnxruntime", [True, False]) def test_shape_inference_ext_data(self, allow_onnxruntime): - arg_group = ArgGroupTestHelper(OnnxLoadArgs(), deps=[ModelArgs(), OnnxInferShapesArgs()]) + arg_group = ArgGroupTestHelper( + OnnxLoadArgs(), deps=[ModelArgs(), OnnxInferShapesArgs()] + ) model = ONNX_MODELS["ext_weights"] arg_group.parse_args( [model.path, "--external-data-dir", model.ext_data, "--shape-inference"] @@ -139,16 +161,28 @@ def test_shape_inference_ext_data(self, allow_onnxruntime): class TestOnnxSaveArgs: def test_defaults(self): - arg_group = ArgGroupTestHelper(OnnxSaveArgs(), deps=[ModelArgs(), OnnxLoadArgs(allow_shape_inference=False)]) + arg_group = ArgGroupTestHelper( + OnnxSaveArgs(), + deps=[ModelArgs(), OnnxLoadArgs(allow_shape_inference=False)], + ) arg_group.parse_args([]) assert arg_group.size_threshold is None def test_external_data(self): model = onnx_from_path(ONNX_MODELS["const_foldable"].path) - arg_group = ArgGroupTestHelper(OnnxSaveArgs(), deps=[ModelArgs(), OnnxLoadArgs(allow_shape_inference=False)]) + arg_group = ArgGroupTestHelper( + OnnxSaveArgs(), + deps=[ModelArgs(), OnnxLoadArgs(allow_shape_inference=False)], + ) with util.NamedTemporaryFile() as path, util.NamedTemporaryFile() as data: arg_group.parse_args( - ["-o", path.name, "--save-external-data", data.name, "--external-data-size-threshold=0"] + [ + "-o", + path.name, + "--save-external-data", + data.name, + "--external-data-size-threshold=0", + ] ) arg_group.save_onnx(model) @@ -157,10 +191,19 @@ def test_external_data(self): def test_size_threshold(self): model = onnx_from_path(ONNX_MODELS["const_foldable"].path) - arg_group = ArgGroupTestHelper(OnnxSaveArgs(), deps=[ModelArgs(), OnnxLoadArgs(allow_shape_inference=False)]) + arg_group = ArgGroupTestHelper( + OnnxSaveArgs(), + deps=[ModelArgs(), OnnxLoadArgs(allow_shape_inference=False)], + ) with util.NamedTemporaryFile() as path, util.NamedTemporaryFile() as data: arg_group.parse_args( - ["-o", path.name, "--save-external-data", data.name, "--external-data-size-threshold=1024"] + [ + "-o", + path.name, + "--save-external-data", + data.name, + "--external-data-size-threshold=1024", + ] ) arg_group.save_onnx(model) @@ -169,7 +212,10 @@ def test_size_threshold(self): def test_no_all_tensors_to_one_file(self): model = onnx_from_path(ONNX_MODELS["const_foldable"].path) - arg_group = ArgGroupTestHelper(OnnxSaveArgs(), deps=[ModelArgs(), OnnxLoadArgs(allow_shape_inference=False)]) + arg_group = ArgGroupTestHelper( + OnnxSaveArgs(), + deps=[ModelArgs(), OnnxLoadArgs(allow_shape_inference=False)], + ) with tempfile.TemporaryDirectory() as outdir: path = os.path.join(outdir, "model.onnx") arg_group.parse_args( @@ -196,7 +242,10 @@ def test_no_all_tensors_to_one_file(self): ], ) def test_size_threshold_parsing(self, arg, expected): - arg_group = ArgGroupTestHelper(OnnxSaveArgs(), deps=[ModelArgs(), OnnxLoadArgs(allow_shape_inference=False)]) + arg_group = ArgGroupTestHelper( + OnnxSaveArgs(), + deps=[ModelArgs(), OnnxLoadArgs(allow_shape_inference=False)], + ) arg_group.parse_args(["--external-data-size-threshold", arg]) assert arg_group.size_threshold == expected @@ -204,7 +253,8 @@ def test_size_threshold_parsing(self, arg, expected): class TestOnnxShapeInferenceArgs: def test_shape_inference_disabled_on_fallback(self): arg_group = ArgGroupTestHelper( - OnnxInferShapesArgs(default=True, allow_force_fallback=True), deps=[DataLoaderArgs()] + OnnxInferShapesArgs(default=True, allow_force_fallback=True), + deps=[DataLoaderArgs()], ) arg_group.parse_args([]) assert arg_group.do_shape_inference @@ -215,7 +265,10 @@ def test_shape_inference_disabled_on_fallback(self): @pytest.mark.parametrize("allow_onnxruntime", [True, False]) def test_no_onnxruntime_shape_inference(self, allow_onnxruntime): arg_group = ArgGroupTestHelper( - OnnxInferShapesArgs(default=True, allow_force_fallback=True), deps=[DataLoaderArgs()] + OnnxInferShapesArgs(default=True, allow_force_fallback=True), + deps=[DataLoaderArgs()], + ) + arg_group.parse_args( + [] if allow_onnxruntime else ["--no-onnxruntime-shape-inference"] ) - arg_group.parse_args([] if allow_onnxruntime else ["--no-onnxruntime-shape-inference"]) assert arg_group.allow_onnxruntime == (None if allow_onnxruntime else False) diff --git a/tools/Polygraphy/tests/tools/args/backend/onnxrt/test_loader.py b/tools/Polygraphy/tests/tools/args/backend/onnxrt/test_loader.py index 5a69a040..aa1dd0bf 100644 --- a/tools/Polygraphy/tests/tools/args/backend/onnxrt/test_loader.py +++ b/tools/Polygraphy/tests/tools/args/backend/onnxrt/test_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,9 +24,12 @@ class TestOnnxrtSessionArgs: def test_execution_providers(self): arg_group = ArgGroupTestHelper( - OnnxrtSessionArgs(), deps=[ModelArgs(), OnnxLoadArgs(allow_shape_inference=False)] + OnnxrtSessionArgs(), + deps=[ModelArgs(), OnnxLoadArgs(allow_shape_inference=False)], + ) + arg_group.parse_args( + [ONNX_MODELS["identity_identity"].path, "--providers", "cpu"] ) - arg_group.parse_args([ONNX_MODELS["identity_identity"].path, "--providers", "cpu"]) sess = arg_group.load_onnxrt_session() assert sess diff --git a/tools/Polygraphy/tests/tools/args/backend/test_runner_select.py b/tools/Polygraphy/tests/tools/args/backend/test_runner_select.py index 0606ebc9..6445134d 100644 --- a/tools/Polygraphy/tests/tools/args/backend/test_runner_select.py +++ b/tools/Polygraphy/tests/tools/args/backend/test_runner_select.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -63,7 +63,11 @@ class TestRunnerSelectArgs: # We should be able to specify the same runner multiple times. ( ["--onnxrt", "--onnxrt", "--onnxrt"], - [("onnxrt", "ONNX-Runtime"), ("onnxrt", "ONNX-Runtime"), ("onnxrt", "ONNX-Runtime")], + [ + ("onnxrt", "ONNX-Runtime"), + ("onnxrt", "ONNX-Runtime"), + ("onnxrt", "ONNX-Runtime"), + ], ), ], ) diff --git a/tools/Polygraphy/tests/tools/args/backend/tf/test_loader.py b/tools/Polygraphy/tests/tools/args/backend/tf/test_loader.py index a297e12e..454b832e 100644 --- a/tools/Polygraphy/tests/tools/args/backend/tf/test_loader.py +++ b/tools/Polygraphy/tests/tools/args/backend/tf/test_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/tools/args/backend/trt/test_config.py b/tools/Polygraphy/tests/tools/args/backend/trt/test_config.py index 32c3943c..ca002f86 100644 --- a/tools/Polygraphy/tests/tools/args/backend/trt/test_config.py +++ b/tools/Polygraphy/tests/tools/args/backend/trt/test_config.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -479,9 +479,11 @@ def test_memory_pool_limits_empty_key_not_allowed(self, args, trt_config_args): @pytest.mark.parametrize( "preview_features", [ - ["PROFILE_SHAriNG_0806"] - if mod.version(trt.__version__) >= mod.version("10.0") - else ["FASter_DYNAMIC_ShAPeS_0805"], + ( + ["PROFILE_SHAriNG_0806"] + if mod.version(trt.__version__) >= mod.version("10.0") + else ["FASter_DYNAMIC_ShAPeS_0805"] + ), ], ) def test_preview_features(self, trt_config_args, preview_features): diff --git a/tools/Polygraphy/tests/tools/args/backend/trt/test_loader.py b/tools/Polygraphy/tests/tools/args/backend/trt/test_loader.py index 9c967532..e8dafedf 100644 --- a/tools/Polygraphy/tests/tools/args/backend/trt/test_loader.py +++ b/tools/Polygraphy/tests/tools/args/backend/trt/test_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -50,10 +50,12 @@ class TestTrtLoadNetworkArgs: @pytest.mark.parametrize("force_onnx_loader", [True, False]) @pytest.mark.parametrize( "opts,expected_flag", - [([], None)] - + [(["--strongly-typed"], trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED)] - if mod.version(trt.__version__) >= mod.version("8.7") - else [], + ( + [([], None)] + + [(["--strongly-typed"], trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED)] + if mod.version(trt.__version__) >= mod.version("8.7") + else [] + ), ) def test_load_network(self, force_onnx_loader, opts, expected_flag): arg_group = ArgGroupTestHelper( diff --git a/tools/Polygraphy/tests/tools/args/backend/trt/test_runner.py b/tools/Polygraphy/tests/tools/args/backend/trt/test_runner.py index fc10831f..f5714c38 100644 --- a/tools/Polygraphy/tests/tools/args/backend/trt/test_runner.py +++ b/tools/Polygraphy/tests/tools/args/backend/trt/test_runner.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -53,7 +53,9 @@ def trt_runner_args(): class TestTrtRunnerArgs: @pytest.mark.parametrize("index", range(0, 3)) def test_optimization_profile(self, trt_runner_args, index): - trt_runner_args.parse_args([ONNX_MODELS["identity"].path, f"--optimization-profile={index}"]) + trt_runner_args.parse_args( + [ONNX_MODELS["identity"].path, f"--optimization-profile={index}"] + ) assert trt_runner_args.optimization_profile == index diff --git a/tools/Polygraphy/tests/tools/args/comparator/test_comparator.py b/tools/Polygraphy/tests/tools/args/comparator/test_comparator.py index 9423cb8c..9eb3bb2d 100644 --- a/tools/Polygraphy/tests/tools/args/comparator/test_comparator.py +++ b/tools/Polygraphy/tests/tools/args/comparator/test_comparator.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -41,12 +41,15 @@ class TestComparatorCompareArgs: ("indices", ["--atol=1"], ["--atol", "--abs-tol"], "simple"), ], ) - def test_compare_func_warnings_for_unused_options(self, compare_func, options, option_names, valid_for): + def test_compare_func_warnings_for_unused_options( + self, compare_func, options, option_names, valid_for + ): outfile = io.StringIO() with contextlib.redirect_stdout(outfile), contextlib.redirect_stderr(outfile): # Keep logger arguments first they're parsed first so we actually write to the log file. arg_group = ArgGroupTestHelper( - ComparatorCompareArgs(), deps=[LoggerArgs(), CompareFuncIndicesArgs(), CompareFuncSimpleArgs()] + ComparatorCompareArgs(), + deps=[LoggerArgs(), CompareFuncIndicesArgs(), CompareFuncSimpleArgs()], ) arg_group.parse_args([f"--compare-func={compare_func}"] + options) @@ -54,7 +57,8 @@ def test_compare_func_warnings_for_unused_options(self, compare_func, options, o logging_out = outfile.read() assert ( f"[W] Option: {'/'.join(option_names)} is only valid for comparison function: '{valid_for}'. " - f"The selected comparison function is: '{compare_func}', so this option will be ignored." in logging_out + f"The selected comparison function is: '{compare_func}', so this option will be ignored." + in logging_out ) diff --git a/tools/Polygraphy/tests/tools/args/comparator/test_compare.py b/tools/Polygraphy/tests/tools/args/comparator/test_compare.py index b44cf45d..d370350f 100644 --- a/tools/Polygraphy/tests/tools/args/comparator/test_compare.py +++ b/tools/Polygraphy/tests/tools/args/comparator/test_compare.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,17 +20,24 @@ import pytest from polygraphy.comparator import IterationResult from polygraphy.exception import PolygraphyException -from polygraphy.tools.args import ComparatorCompareArgs, CompareFuncIndicesArgs, CompareFuncSimpleArgs +from polygraphy.tools.args import ( + ComparatorCompareArgs, + CompareFuncIndicesArgs, + CompareFuncSimpleArgs, +) from polygraphy.tools.args import util as args_util from polygraphy.tools.script import Script from tests.tools.args.helper import ArgGroupTestHelper class TestCompareFuncSimple: - @pytest.mark.parametrize("check_error_stat", ["max", "median", "mean", "elemwise", "quantile"]) + @pytest.mark.parametrize( + "check_error_stat", ["max", "median", "mean", "elemwise", "quantile"] + ) def test_error_stat(self, check_error_stat): arg_group = ArgGroupTestHelper( - CompareFuncSimpleArgs(), deps=[ComparatorCompareArgs(), CompareFuncIndicesArgs()] + CompareFuncSimpleArgs(), + deps=[ComparatorCompareArgs(), CompareFuncIndicesArgs()], ) arg_group.parse_args([f"--check-error-stat={check_error_stat}"]) @@ -39,13 +46,20 @@ def test_error_stat(self, check_error_stat): @pytest.mark.parametrize( "args, expected", [ - (["mean", "output0:median", "output1:max"], {"": "mean", "output0": "median", "output1": "max"}), - (["output0:median", "output1:elemwise"], {"output0": "median", "output1": "elemwise"}), + ( + ["mean", "output0:median", "output1:max"], + {"": "mean", "output0": "median", "output1": "max"}, + ), + ( + ["output0:median", "output1:elemwise"], + {"output0": "median", "output1": "elemwise"}, + ), ], ) def test_error_stat_per_output(self, args, expected): arg_group = ArgGroupTestHelper( - CompareFuncSimpleArgs(), deps=[ComparatorCompareArgs(), CompareFuncIndicesArgs()] + CompareFuncSimpleArgs(), + deps=[ComparatorCompareArgs(), CompareFuncIndicesArgs()], ) arg_group.parse_args(["--check-error-stat"] + args) @@ -61,14 +75,16 @@ def test_error_stat_per_output(self, args, expected): def test_invalid_error_stat(self, args): with pytest.raises(PolygraphyException, match="Invalid choice"): arg_group = ArgGroupTestHelper( - CompareFuncSimpleArgs(), deps=[ComparatorCompareArgs(), CompareFuncIndicesArgs()] + CompareFuncSimpleArgs(), + deps=[ComparatorCompareArgs(), CompareFuncIndicesArgs()], ) arg_group.parse_args(["--check-error-stat"] + args) @pytest.mark.parametrize("val", (np.inf, -np.inf)) def test_infinities_compare_equal(self, val): arg_group = ArgGroupTestHelper( - CompareFuncSimpleArgs(), deps=[ComparatorCompareArgs(), CompareFuncIndicesArgs()] + CompareFuncSimpleArgs(), + deps=[ComparatorCompareArgs(), CompareFuncIndicesArgs()], ) arg_group.parse_args([f"--infinities-compare-equal"]) @@ -85,7 +101,8 @@ class TestCompareFuncIndices: def test_always_adds_to_script(self): # Indices is not the default comparison func, so it should always add itself to the script. arg_group = ArgGroupTestHelper( - CompareFuncIndicesArgs(), deps=[ComparatorCompareArgs(), CompareFuncSimpleArgs()] + CompareFuncIndicesArgs(), + deps=[ComparatorCompareArgs(), CompareFuncSimpleArgs()], ) arg_group.parse_args([]) @@ -104,7 +121,8 @@ def test_default_args_are_none(self, arg_group_type): other_group_types = set(TestDefaultNone.ARG_GROUP_TYPES) other_group_types.remove(arg_group_type) arg_group = ArgGroupTestHelper( - arg_group_type(), deps=[ComparatorCompareArgs()] + [g() for g in other_group_types] + arg_group_type(), + deps=[ComparatorCompareArgs()] + [g() for g in other_group_types], ) assert len(arg_group.arg_group.group._group_actions) > 0 for action in arg_group.arg_group.group._group_actions: diff --git a/tools/Polygraphy/tests/tools/args/comparator/test_data_loader.py b/tools/Polygraphy/tests/tools/args/comparator/test_data_loader.py index 7c4dac90..d9c2a053 100644 --- a/tools/Polygraphy/tests/tools/args/comparator/test_data_loader.py +++ b/tools/Polygraphy/tests/tools/args/comparator/test_data_loader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,9 +40,18 @@ class TestDataLoaderArgs: (["--seed=123"], ["seed"], [123]), (["--int-min=23", "--int-max=94"], ["_int_range"], [(23, 94)]), (["--float-min=2.3", "--float-max=9.4"], ["_float_range"], [(2.3, 9.4)]), - ([], ["val_range"], [None], [(0.0, 1.0)]), # When not specified, this should default to None. + ( + [], + ["val_range"], + [None], + [(0.0, 1.0)], + ), # When not specified, this should default to None. (["--val-range", "[0.0,2.3]"], ["val_range"], [{"": (0.0, 2.3)}]), - (["--val-range", "[1,5]"], ["val_range"], [{"": (1, 5)}]), # Should work for integral quantities + ( + ["--val-range", "[1,5]"], + ["val_range"], + [{"": (1, 5)}], + ), # Should work for integral quantities ( ["--val-range", "inp0:[0.0,2.3]", "inp1:[4.5,9.6]"], ["val_range"], @@ -57,9 +66,21 @@ class TestDataLoaderArgs: (["--val-range", "'\"':[0.0,2.3]"], ["val_range"], [{"'\"'": (0.0, 2.3)}]), (["--iterations=12"], ["iterations"], [12]), (["--val-range", "[0.0,inf]"], ["val_range"], [{"": (0.0, float("inf"))}]), - (["--val-range", "[-inf,0.0]"], ["val_range"], [{"": (float("-inf"), 0.0)}]), - (["--data-loader-backend-module", "torch"], ["data_loader_backend_module"], ["torch"]), - (["--data-loader-backend-module", "numpy"], ["data_loader_backend_module"], ["numpy"]), + ( + ["--val-range", "[-inf,0.0]"], + ["val_range"], + [{"": (float("-inf"), 0.0)}], + ), + ( + ["--data-loader-backend-module", "torch"], + ["data_loader_backend_module"], + ["torch"], + ), + ( + ["--data-loader-backend-module", "numpy"], + ["data_loader_backend_module"], + ["numpy"], + ), ], ids=lambda c: c[1][0], ) @@ -82,7 +103,9 @@ def test_val_range_nan(self, data_loader_args): assert util.is_nan(val_range[0]) def test_input_metadata(self, data_loader_args): - data_loader_args.parse_args(["--input-shapes", "test0:[1,1,1]", "test1:[2,32,2]"]) + data_loader_args.parse_args( + ["--input-shapes", "test0:[1,1,1]", "test1:[2,32,2]"] + ) data_loader = data_loader_args.get_data_loader() for feed_dict in data_loader: @@ -92,7 +115,9 @@ def test_input_metadata(self, data_loader_args): def test_override_input_metadata(self, data_loader_args): data_loader_args.parse_args([]) data_loader = data_loader_args.get_data_loader( - user_input_metadata=TensorMetadata().add("test0", dtype=np.float32, shape=(4, 4)) + user_input_metadata=TensorMetadata().add( + "test0", dtype=np.float32, shape=(4, 4) + ) ) for feed_dict in data_loader: @@ -118,7 +143,9 @@ def my_load_data(): f.flush() os.fsync(f.fileno()) - data_loader_args.parse_args(["--data-loader-script", f"{f.name}:my_load_data"]) + data_loader_args.parse_args( + ["--data-loader-script", f"{f.name}:my_load_data"] + ) assert data_loader_args.data_loader_script == f.name assert data_loader_args.data_loader_func_name == "my_load_data" @@ -126,13 +153,19 @@ def my_load_data(): data_loader = data_loader_args.get_data_loader() data = list(data_loader) assert len(data) == 5 - assert all(np.all(d["inp"] == np.ones((3, 5), dtype=np.float32) * 6.4341) for d in data) + assert all( + np.all(d["inp"] == np.ones((3, 5), dtype=np.float32) * 6.4341) + for d in data + ) @pytest.mark.parametrize( "opts,expected_err", [ (["--val-range", "x:[y,2]"], "could not be parsed as a number"), - (["--val-range", "x:[1,2,3]"], "expected to receive exactly 2 values, but received 3"), + ( + ["--val-range", "x:[1,2,3]"], + "expected to receive exactly 2 values, but received 3", + ), ], ) def test_val_range_errors(self, data_loader_args, opts, expected_err): @@ -141,4 +174,6 @@ def test_val_range_errors(self, data_loader_args, opts, expected_err): def test_cannot_provide_two_custom_data_loader_methods(self, data_loader_args): with pytest.raises(SystemExit): - data_loader_args.parse_args(["--data-loader-script", "my_script.py", "--load-inputs", "inputs.json"]) + data_loader_args.parse_args( + ["--data-loader-script", "my_script.py", "--load-inputs", "inputs.json"] + ) diff --git a/tools/Polygraphy/tests/tools/args/helper.py b/tools/Polygraphy/tests/tools/args/helper.py index 4e9225e0..4742df84 100644 --- a/tools/Polygraphy/tests/tools/args/helper.py +++ b/tools/Polygraphy/tests/tools/args/helper.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/tools/args/logger/test_logger.py b/tools/Polygraphy/tests/tools/args/logger/test_logger.py index 96c4e464..d9f373a0 100644 --- a/tools/Polygraphy/tests/tools/args/logger/test_logger.py +++ b/tools/Polygraphy/tests/tools/args/logger/test_logger.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -56,7 +56,11 @@ def test_get_logger_verbosities(self, case): (["--verbosity", "info"], {None: G_LOGGER.INFO, "backend/": G_LOGGER.INFO}), ( ["--verbosity", "INFO", "backend:VERBOSE"], - {None: G_LOGGER.INFO, "backend": G_LOGGER.VERBOSE, os.path.join("backend", "trt"): G_LOGGER.VERBOSE}, + { + None: G_LOGGER.INFO, + "backend": G_LOGGER.VERBOSE, + os.path.join("backend", "trt"): G_LOGGER.VERBOSE, + }, ), ( ["--verbosity", "ULTRA_VERBOSE", "backend:VERBOSE"], @@ -68,7 +72,11 @@ def test_get_logger_verbosities(self, case): ), ( ["--verbosity", "backend/trt:VERBOSE"], - {None: G_LOGGER.INFO, "backend/": G_LOGGER.INFO, os.path.join("backend", "trt"): G_LOGGER.VERBOSE}, + { + None: G_LOGGER.INFO, + "backend/": G_LOGGER.INFO, + os.path.join("backend", "trt"): G_LOGGER.VERBOSE, + }, ), ], ) diff --git a/tools/Polygraphy/tests/tools/args/test_docstrings.py b/tools/Polygraphy/tests/tools/args/test_docstrings.py index 671a6557..c79f5df3 100644 --- a/tools/Polygraphy/tests/tools/args/test_docstrings.py +++ b/tools/Polygraphy/tests/tools/args/test_docstrings.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,7 +24,11 @@ from polygraphy.tools.args.base import BaseArgs -ARG_CLASSES = [cls for cls in args_mod.__dict__.values() if inspect.isclass(cls) and issubclass(cls, BaseArgs)] +ARG_CLASSES = [ + cls + for cls in args_mod.__dict__.values() + if inspect.isclass(cls) and issubclass(cls, BaseArgs) +] USES_DEP_PAT = re.compile(r"self.arg_groups\[(.*?)\]") MEMBER_PAT = re.compile(r"self.(.*?)[ ,.\[}]") @@ -54,7 +58,9 @@ def test_docstrings_document_dependencies(self, arg_group_type): continue documented_deps.add(line.lstrip("-").partition(":")[0].strip()) - assert documented_deps == deps, "Documented dependencies do not match actual dependencies" + assert ( + documented_deps == deps + ), "Documented dependencies do not match actual dependencies" # Checks that all members set by `parse` are documented. # @@ -78,7 +84,11 @@ def should_include_member(member): return True - members = {member for member in MEMBER_PAT.findall(code) if should_include_member(member)} + members = { + member + for member in MEMBER_PAT.findall(code) + if should_include_member(member) + } docstring = arg_group_type.parse_impl.__doc__ if docstring is None: @@ -87,7 +97,9 @@ def should_include_member(member): doc_lines = [line.strip() for line in docstring.splitlines() if line.strip()] attributes_doc_start = doc_lines.index("Attributes:") - assert attributes_doc_start >= 0, "Expected parse_impl docstring to contain an `Attributes:` section." + assert ( + attributes_doc_start >= 0 + ), "Expected parse_impl docstring to contain an `Attributes:` section." doc_lines = doc_lines[attributes_doc_start + 1 :] diff --git a/tools/Polygraphy/tests/tools/args/test_model.py b/tools/Polygraphy/tests/tools/args/test_model.py index c32231b7..6b4346df 100644 --- a/tools/Polygraphy/tests/tools/args/test_model.py +++ b/tools/Polygraphy/tests/tools/args/test_model.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,7 +40,15 @@ def test_path(self, group): assert group.model_type.is_onnx() def test_input_shapes(self, group): - group.parse_args(["--input-shapes", "test0:[1,1]", "test1:[10]", "test:2:[25,301]", "test3:[]"]) + group.parse_args( + [ + "--input-shapes", + "test0:[1,1]", + "test1:[10]", + "test:2:[25,301]", + "test3:[]", + ] + ) assert group.input_shapes["test0"].shape == [1, 1] assert group.input_shapes["test1"].shape == [10] @@ -55,17 +63,21 @@ def test_fixed_model_type(self): @pytest.mark.parametrize( "arg, expected_model, expected_extra_info", - [ - ("model.onnx", "model.onnx", None), - ("model.onnx:func", "model.onnx", "func"), - ] - if not "win" in sys.platform - else [ - ("C:\\Users\\model.onnx", "C:\\Users\\model.onnx", None), - ("C:\\Users\\model.onnx:func", "C:\\Users\\model.onnx", "func"), - ], + ( + [ + ("model.onnx", "model.onnx", None), + ("model.onnx:func", "model.onnx", "func"), + ] + if not "win" in sys.platform + else [ + ("C:\\Users\\model.onnx", "C:\\Users\\model.onnx", None), + ("C:\\Users\\model.onnx:func", "C:\\Users\\model.onnx", "func"), + ] + ), ) - def test_model_with_extra_info(self, group, arg, expected_model, expected_extra_info): + def test_model_with_extra_info( + self, group, arg, expected_model, expected_extra_info + ): group.parse_args([arg]) assert group.path == expected_model diff --git a/tools/Polygraphy/tests/tools/args/util/test_util.py b/tools/Polygraphy/tests/tools/args/util/test_util.py index c27e683a..a82ac835 100644 --- a/tools/Polygraphy/tests/tools/args/util/test_util.py +++ b/tools/Polygraphy/tests/tools/args/util/test_util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -76,7 +76,9 @@ class TestRunScript: def test_default_args(self): def script_add(script, arg0=0, arg1=0): result_name = safe("result") - script.append_suffix(safe("{:} = {:} + {:}", inline(result_name), arg0, arg1)) + script.append_suffix( + safe("{:} = {:} + {:}", inline(result_name), arg0, arg1) + ) return result_name assert args_util.run_script(script_add) == 0 diff --git a/tools/Polygraphy/tests/tools/conftest.py b/tools/Polygraphy/tests/tools/conftest.py index 508e7155..7d17e327 100644 --- a/tools/Polygraphy/tests/tools/conftest.py +++ b/tools/Polygraphy/tests/tools/conftest.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -45,6 +45,7 @@ def poly_fixture_impl( return poly_fixture + poly = make_poly_fixture([]) poly_run = make_poly_fixture(["run"]) poly_convert = make_poly_fixture(["convert"]) diff --git a/tools/Polygraphy/tests/tools/fake_reduce_checker.py b/tools/Polygraphy/tests/tools/fake_reduce_checker.py index f99085c9..5923bf83 100755 --- a/tools/Polygraphy/tests/tools/fake_reduce_checker.py +++ b/tools/Polygraphy/tests/tools/fake_reduce_checker.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -27,7 +27,9 @@ def main(): - parser = argparse.ArgumentParser(description="Makes Polygraphy think a node in a model is failing") + parser = argparse.ArgumentParser( + description="Makes Polygraphy think a node in a model is failing" + ) parser.add_argument("model", help="The ONNX model") parser.add_argument( "--fail-node", @@ -37,10 +39,16 @@ def main(): nargs="+", ) parser.add_argument( - "--default-return-code", help="The return code to use when there are no failures. ", default=0, type=int + "--default-return-code", + help="The return code to use when there are no failures. ", + default=0, + type=int, ) parser.add_argument( - "--fail-return-code", help="The return code to use when there is a failure. ", default=1, type=int + "--fail-return-code", + help="The return code to use when there is a failure. ", + default=1, + type=int, ) args = parser.parse_args() diff --git a/tools/Polygraphy/tests/tools/test_check.py b/tools/Polygraphy/tests/tools/test_check.py index a3b27a85..ea59c023 100644 --- a/tools/Polygraphy/tests/tools/test_check.py +++ b/tools/Polygraphy/tests/tools/test_check.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -76,18 +76,24 @@ def run_lint_get_json(self, poly_check, model_path, *args, expect_error=False): def eval_per_entry(self, lint_entries, lambda_check): return list(map(lambda_check, lint_entries)) - @pytest.mark.parametrize("case", TEST_LINT_CASES["test_summary"], ids=lambda case: case[0]) + @pytest.mark.parametrize( + "case", TEST_LINT_CASES["test_summary"], ids=lambda case: case[0] + ) @pytest.mark.script_launch_mode("subprocess") def test_summary(self, case, poly_check): """ Basic test to check that nodes are correctly classified as passing or failing """ model_name, expected_passing, expected_failing = case - output_json, status = self.run_lint_get_json(poly_check, ONNX_MODELS[model_name].path) + output_json, status = self.run_lint_get_json( + poly_check, ONNX_MODELS[model_name].path + ) passing = sorted(output_json["summary"].get("passing", [])) assert expected_passing == passing # check that the valid nodes are as expected failing = sorted(output_json["summary"].get("failing", [])) - assert expected_failing == failing # check that the invalid nodes are as expected + assert ( + expected_failing == failing + ) # check that the invalid nodes are as expected @pytest.mark.script_launch_mode("subprocess") def test_duplicate_node_names_caught(self, poly_check): @@ -95,7 +101,9 @@ def test_duplicate_node_names_caught(self, poly_check): Test that duplicate node names are marked as exception """ output_json, _ = self.run_lint_get_json( - poly_check, ONNX_MODELS["bad_graph_with_duplicate_node_names"].path, expect_error=True + poly_check, + ONNX_MODELS["bad_graph_with_duplicate_node_names"].path, + expect_error=True, ) lint_entry = output_json["lint_entries"][0] @@ -108,13 +116,17 @@ def test_duplicate_node_names_caught(self, poly_check): assert lint_entry == expected_entry assert "identical" in output_json["summary"]["failing"] - @pytest.mark.parametrize("model_name", TEST_LINT_CASES["test_onnx_spec_check"], ids=lambda m: m) + @pytest.mark.parametrize( + "model_name", TEST_LINT_CASES["test_onnx_spec_check"], ids=lambda m: m + ) @pytest.mark.script_launch_mode("subprocess") def test_onnx_spec_check(self, model_name, poly_check): """ Test that basic onnx specification errors are caught by the lint command from the ONNX Checker """ - output_json, _ = self.run_lint_get_json(poly_check, ONNX_MODELS[model_name].path, expect_error=True) + output_json, _ = self.run_lint_get_json( + poly_check, ONNX_MODELS[model_name].path, expect_error=True + ) assert any( # Make sure that there is atleast 1 entry with level exception and source onnx_checker self.eval_per_entry( @@ -152,7 +164,9 @@ def test_onnxrt_parity(self, model_name, poly_check, poly_run): } try: # try to run the model using onnxrt, may fail. - status = poly_run([model_path, "--onnxrt", *extra_args_dict.get(model_name, [])]) + status = poly_run( + [model_path, "--onnxrt", *extra_args_dict.get(model_name, [])] + ) poly_run_exception = "FAILED" in status.stdout except Exception as e: poly_run_exception = True @@ -219,7 +233,9 @@ def test_parallel_invalid_nodes_caught(self, poly_check): # Check correct summary assert sorted(expected_valid_nodes) == sorted(output_json["summary"]["passing"]) - assert sorted(expected_invalid_dict.keys()) == sorted(output_json["summary"]["failing"]) + assert sorted(expected_invalid_dict.keys()) == sorted( + output_json["summary"]["failing"] + ) @pytest.mark.parametrize( "input_bool", @@ -270,8 +286,12 @@ def test_data_dependent_errors_caught(self, poly_check, input_bool): } # Check that the output is as expected. - assert sorted(validation_dict[input_bool]["passing"]) == sorted(output_json["summary"]["passing"]) - assert sorted(validation_dict[input_bool]["failing"]) == sorted(output_json["summary"].get("failing", [])) + assert sorted(validation_dict[input_bool]["passing"]) == sorted( + output_json["summary"]["passing"] + ) + assert sorted(validation_dict[input_bool]["failing"]) == sorted( + output_json["summary"].get("failing", []) + ) if validation_dict[input_bool]["failing"]: # when input_bool = False expected_entry = { @@ -296,7 +316,12 @@ def test_custom_op(self, poly_check): expect_error=False, ) condition = ( - lambda entry: any([substr in entry["message"] for substr in Lint.CUSTOM_OP_EXCEPTION_SUBSTRS]) + lambda entry: any( + [ + substr in entry["message"] + for substr in Lint.CUSTOM_OP_EXCEPTION_SUBSTRS + ] + ) and entry["source"] == Lint.Source.ONNXRUNTIME.value and entry["level"] == Lint.Level.WARNING.value ) @@ -323,7 +348,8 @@ def test_multi_level_errors(self, poly_check): # condition for onnx checker entry condition_onnx_checker = ( - lambda entry: "Field 'name' of 'graph' is required to be non-empty." in entry["message"] + lambda entry: "Field 'name' of 'graph' is required to be non-empty." + in entry["message"] and entry["source"] == Lint.Source.ONNX_CHECKER.value and entry["level"] == Lint.Level.EXCEPTION.value ) @@ -337,8 +363,12 @@ def test_multi_level_errors(self, poly_check): # checks assert len(lint_entries) >= 2 # there should be atleast two lint entries - assert any(self.eval_per_entry(lint_entries, condition_onnx_checker)) # condition for onnx checker entry - assert any(self.eval_per_entry(lint_entries, condition_onnxruntime)) # condition for onnxruntime entry + assert any( + self.eval_per_entry(lint_entries, condition_onnx_checker) + ) # condition for onnx checker entry + assert any( + self.eval_per_entry(lint_entries, condition_onnxruntime) + ) # condition for onnxruntime entry @pytest.mark.script_launch_mode("subprocess") def test_invalid_model_error(self, poly_check): @@ -359,13 +389,16 @@ def test_invalid_model_error(self, poly_check): # condition for onnx_loader entry for invalid model condition = ( - lambda entry: "Error parsing message with type 'onnx.ModelProto'" in entry["message"] + lambda entry: "Error parsing message with type 'onnx.ModelProto'" + in entry["message"] and entry["source"] == Lint.Source.ONNX_LOADER.value and entry["level"] == Lint.Level.EXCEPTION.value ) assert len(lint_entries) == 1 # there should be only one lint entry - assert condition(lint_entries[0]) # condition for onnx_loader entry for invalid model + assert condition( + lint_entries[0] + ) # condition for onnx_loader entry for invalid model @pytest.mark.script_launch_mode("subprocess") def test_empty_model_warning(self, poly_check): @@ -377,7 +410,9 @@ def test_empty_model_warning(self, poly_check): empty_model_name = "empty" # Test with empty model - output_json, _ = self.run_lint_get_json(poly_check, ONNX_MODELS[empty_model_name].path, expect_error=False) + output_json, _ = self.run_lint_get_json( + poly_check, ONNX_MODELS[empty_model_name].path, expect_error=False + ) lint_entries = output_json["lint_entries"] # condition for onnx_loader entry for empty model @@ -411,7 +446,8 @@ def test_cleanable_warning(self, poly_check): and entry["nodes"] == ["G"] ) inp_check = ( - lambda entry: "Input: 'e' does not affect outputs, can be removed" in entry["message"] + lambda entry: "Input: 'e' does not affect outputs, can be removed" + in entry["message"] and entry["source"] == Lint.Source.ONNX_GS.value and entry["level"] == Lint.Level.WARNING.value ) @@ -425,7 +461,9 @@ def test_empty_nodes_renaming(self, poly_check): """ Tests that empty nodes are *gauranteed* a unique name while renaming. """ - output_json, _ = self.run_lint_get_json(poly_check, ONNX_MODELS["renamable"].path, expect_error=False) + output_json, _ = self.run_lint_get_json( + poly_check, ONNX_MODELS["renamable"].path, expect_error=False + ) names = output_json["summary"]["passing"] expected_names = [ "polygraphy_unnamed_node_0_0", diff --git a/tools/Polygraphy/tests/tools/test_convert.py b/tools/Polygraphy/tests/tools/test_convert.py index cebfd240..1d97acb9 100644 --- a/tools/Polygraphy/tests/tools/test_convert.py +++ b/tools/Polygraphy/tests/tools/test_convert.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -31,13 +31,21 @@ def test_tf2onnx(self, poly_convert): pytest.importorskip("tensorflow") with util.NamedTemporaryFile(suffix=".onnx") as outmodel: - poly_convert([TF_MODELS["identity"].path, "--model-type=frozen", "-o", outmodel.name]) + poly_convert( + [TF_MODELS["identity"].path, "--model-type=frozen", "-o", outmodel.name] + ) assert onnx.load(outmodel.name) def test_fp_to_fp16(self, poly_convert): with util.NamedTemporaryFile() as outmodel: poly_convert( - [ONNX_MODELS["identity_identity"].path, "--convert-to=onnx", "--fp-to-fp16", "-o", outmodel.name] + [ + ONNX_MODELS["identity_identity"].path, + "--convert-to=onnx", + "--fp-to-fp16", + "-o", + outmodel.name, + ] ) # I/O types should be unchanged model = onnx.load(outmodel.name) @@ -57,14 +65,24 @@ def check_engine(self, path): def test_onnx_to_trt(self, poly_convert): with util.NamedTemporaryFile(suffix=".engine") as outmodel: - poly_convert([ONNX_MODELS["identity"].path, "--model-type=onnx", "-o", outmodel.name]) + poly_convert( + [ONNX_MODELS["identity"].path, "--model-type=onnx", "-o", outmodel.name] + ) self.check_engine(outmodel.name) def test_tf_to_onnx_to_trt(self, poly_convert): pytest.importorskip("tensorflow") with util.NamedTemporaryFile() as outmodel: - poly_convert([TF_MODELS["identity"].path, "--model-type=frozen", "--convert-to=trt", "-o", outmodel.name]) + poly_convert( + [ + TF_MODELS["identity"].path, + "--model-type=frozen", + "--convert-to=trt", + "-o", + outmodel.name, + ] + ) self.check_engine(outmodel.name) def test_trt_network_config_script_to_engine(self, poly_convert): @@ -86,7 +104,9 @@ def load_config(config): """ ) - with util.NamedTemporaryFile("w+", suffix=".py") as f, util.NamedTemporaryFile() as outmodel: + with util.NamedTemporaryFile( + "w+", suffix=".py" + ) as f, util.NamedTemporaryFile() as outmodel: f.write(script) f.flush() os.fsync(f.fileno()) @@ -106,7 +126,16 @@ def load_config(config): def test_modify_onnx_outputs(self, poly_convert): with util.NamedTemporaryFile(suffix=".onnx") as outmodel: - poly_convert([ONNX_MODELS["identity_identity"].path, "-o", outmodel.name, "--onnx-outputs", "mark", "all"]) + poly_convert( + [ + ONNX_MODELS["identity_identity"].path, + "-o", + outmodel.name, + "--onnx-outputs", + "mark", + "all", + ] + ) model = onnx.load(outmodel.name) assert len(model.graph.output) == 2 @@ -114,8 +143,24 @@ def test_modify_onnx_outputs(self, poly_convert): class TestConvertToOnnxLikeTrt: @pytest.mark.parametrize( - "model_name", ["identity", "empty_tensor_expand", "const_foldable", "and", "scan", "dim_param", "tensor_attr"] + "model_name", + [ + "identity", + "empty_tensor_expand", + "const_foldable", + "and", + "scan", + "dim_param", + "tensor_attr", + ], ) def test_onnx_to_trt_to_onnx_like(self, poly_convert, model_name): with util.NamedTemporaryFile() as outmodel: - poly_convert([ONNX_MODELS[model_name].path, "--convert-to=onnx-like-trt-network", "-o", outmodel.name]) + poly_convert( + [ + ONNX_MODELS[model_name].path, + "--convert-to=onnx-like-trt-network", + "-o", + outmodel.name, + ] + ) diff --git a/tools/Polygraphy/tests/tools/test_data.py b/tools/Polygraphy/tests/tools/test_data.py index 09116c72..e31f60dd 100644 --- a/tools/Polygraphy/tests/tools/test_data.py +++ b/tools/Polygraphy/tests/tools/test_data.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,7 +24,14 @@ class TestToInput: def test_merge_inputs_outputs(self, poly_run, poly_data): with util.NamedTemporaryFile() as inps, util.NamedTemporaryFile() as outs, util.NamedTemporaryFile() as merged: poly_run( - [ONNX_MODELS["identity"].path, "--onnxrt", "--save-inputs", inps.name, "--save-outputs", outs.name], + [ + ONNX_MODELS["identity"].path, + "--onnxrt", + "--save-inputs", + inps.name, + "--save-outputs", + outs.name, + ], ) poly_data(["to-input", inps.name, outs.name, "-o", merged.name]) diff --git a/tools/Polygraphy/tests/tools/test_debug.py b/tools/Polygraphy/tests/tools/test_debug.py index d0fb425a..18797861 100644 --- a/tools/Polygraphy/tests/tools/test_debug.py +++ b/tools/Polygraphy/tests/tools/test_debug.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -75,7 +75,16 @@ def check_outdir(subdir): files = glob.glob(os.path.join(outdir, subdir, "*")) assert len(files) == 1 basenames = list(map(os.path.basename, files)) - assert len([f for f in basenames if f.startswith("replay") and f.endswith(".json")]) == 1 + assert ( + len( + [ + f + for f in basenames + if f.startswith("replay") and f.endswith(".json") + ] + ) + == 1 + ) check_outdir("good") check_outdir("bad") @@ -105,7 +114,9 @@ def test_sanity(self, mode, direction, check_status, model, poly_debug): class TestReduce: - FAKE_REDUCE_CHECKER = os.path.join(os.path.dirname(__file__), "fake_reduce_checker.py") + FAKE_REDUCE_CHECKER = os.path.join( + os.path.dirname(__file__), "fake_reduce_checker.py" + ) # Test left branch, right branch, at the point of branching, and after the branch. @pytest.mark.parametrize( @@ -315,7 +326,9 @@ def test_no_reduce_required_branches(self, fail_nodes, poly_debug): model = onnx_from_path(os.path.join(outdir, "reduced.onnx")) node_names = [node.name for node in model.graph.node] assert all(fail_node in node_names for fail_node in fail_nodes) - assert len(model.graph.node) <= 3 # The branch on the opposite side of the model should be removed. + assert ( + len(model.graph.node) <= 3 + ) # The branch on the opposite side of the model should be removed. @pytest.mark.parametrize("opts", [[], ["--force-fallback-shape-inference"]]) def test_reduce_shape_inference(self, opts, poly_debug): @@ -374,7 +387,12 @@ def test_reduce_custom_data_multibranch_input(self, negative, poly_debug, poly_r model = ONNX_MODELS["reducable"].path inp_data_path = os.path.join(outdir, "custom_inputs.json") - inputs = [{"X0": np.array([3.14159265], dtype=np.float32), "Y0": np.array([2.7749389])}] + inputs = [ + { + "X0": np.array([3.14159265], dtype=np.float32), + "Y0": np.array([2.7749389]), + } + ] save_json(inputs, inp_data_path) # Generate golden outputs @@ -416,10 +434,13 @@ def test_reduce_custom_data_multibranch_input(self, negative, poly_debug, poly_r # distinct from the input data we specified. # Otherwise, reduce should use the data loader we provided and hence pass assert ("FAILED" in status.stdout + status.stderr) == negative - assert ("Difference exceeds tolerance" in status.stdout + status.stderr) == negative + assert ( + "Difference exceeds tolerance" in status.stdout + status.stderr + ) == negative # Reduce should issue a warning when it detects that the default data loader is in use. assert ( - "Please ensure that you have provided a data loader argument directly" in status.stdout + status.stderr + "Please ensure that you have provided a data loader argument directly" + in status.stdout + status.stderr ) == negative @pytest.mark.script_launch_mode("subprocess") @@ -524,7 +545,9 @@ class TestRepeat: ) def test_until(self, until, check, expected_iters, poly_debug): with tempfile.TemporaryDirectory() as outdir: - status = poly_debug(["repeat", "--until", until, "--check", check], cwd=outdir) + status = poly_debug( + ["repeat", "--until", until, "--check", check], cwd=outdir + ) assert f"Finished {expected_iters} iteration(s)" in status.stdout def test_iteration_info(self, poly_debug): @@ -579,5 +602,7 @@ def test_iteration_info(self, poly_debug): ) def test_ignore_fail_code(self, poly_debug, opts, expected_output): with tempfile.TemporaryDirectory() as outdir: - status = poly_debug(["repeat", "--until=5"] + opts + ["--check", "false"], cwd=outdir) + status = poly_debug( + ["repeat", "--until=5"] + opts + ["--check", "false"], cwd=outdir + ) assert expected_output in status.stdout diff --git a/tools/Polygraphy/tests/tools/test_deprecated.py b/tools/Polygraphy/tests/tools/test_deprecated.py index b74bee29..4688fe87 100644 --- a/tools/Polygraphy/tests/tools/test_deprecated.py +++ b/tools/Polygraphy/tests/tools/test_deprecated.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/tools/test_inspect.py b/tools/Polygraphy/tests/tools/test_inspect.py index 953c21b3..cd09c581 100644 --- a/tools/Polygraphy/tests/tools/test_inspect.py +++ b/tools/Polygraphy/tests/tools/test_inspect.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -613,19 +613,21 @@ def test_num_items(self, poly_run, poly_inspect, num_items): TACTIC_REPLAY_CASES = [ [ "pow_scalar", - r""" + ( + r""" [I] Layer: (Unnamed Layer* 0) [Shuffle] Algorithm: (Implementation: 2147483661, Tactic: 0) | Inputs: (TensorInfo(DataType.FLOAT, (), -1, 1),) | Outputs: (TensorInfo(DataType.FLOAT, (1,), -1, 1),) Layer: node_of_z Algorithm: (Implementation: 2147483651, Tactic: 1) | Inputs: (TensorInfo(DataType.FLOAT, (1,), -1, 1), TensorInfo(DataType.FLOAT, (1,), -1, 1)) | Outputs: (TensorInfo(DataType.FLOAT, (1,), -1, 1),) """ - if mod.version(trt.__version__) < mod.version("8.7") - else r""" + if mod.version(trt.__version__) < mod.version("8.7") + else r""" [I] Layer: ONNXTRT_Broadcast Algorithm: (Implementation: 2147483661, Tactic: 0) | Inputs: (TensorInfo(DataType.FLOAT, (), -1, 1),) | Outputs: (TensorInfo(DataType.FLOAT, (1,), -1, 1),) Layer: PWN(node_of_z) Algorithm: (Implementation: 2147483688, Tactic: 1) | Inputs: (TensorInfo(DataType.FLOAT, (1,), -1, 1), TensorInfo(DataType.FLOAT, (1,), -1, 1)) | Outputs: (TensorInfo(DataType.FLOAT, (1,), -1, 1),) - """, + """ + ), ], ] @@ -668,22 +670,24 @@ def test_show_tactics(self, case, poly_run, poly_inspect): "unsupported_subgraph-nodes-2-2.onnx", "unsupported_subgraph-nodes-4-4.onnx", ], - """ + ( + """ [I] ===== Summary ===== Operator | Count | Reason | Nodes ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- FAKE! | 2 | In node 0 (importFallbackPluginImporter): UNSUPPORTED_NODE: Assertion failed: creator && "Plugin not found, are the plugin name, version, and namespace correct?" | [[0, 1], [2, 3]] FAKER! | 1 | In node 0 (importFallbackPluginImporter): UNSUPPORTED_NODE: Assertion failed: creator && "Plugin not found, are the plugin name, version, and namespace correct?" | [[4, 5]] """ - if mod.version(trt.__version__) < mod.version("10.0") - else """ + if mod.version(trt.__version__) < mod.version("10.0") + else """ [I] ===== Summary ===== Operator | Count | Reason | Nodes -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- FAKE! | 1 | In node 0 with name: Fake1 and operator: FAKE! (checkFallbackPluginImporter): INVALID_NODE: creator && "Plugin not found, are the plugin name, version, and namespace correct?" | [[0, 1]] FAKE! | 1 | In node 0 with name: Fake2 and operator: FAKE! (checkFallbackPluginImporter): INVALID_NODE: creator && "Plugin not found, are the plugin name, version, and namespace correct?" | [[2, 3]] FAKER! | 1 | In node 0 with name: Fake3 and operator: FAKER! (checkFallbackPluginImporter): INVALID_NODE: creator && "Plugin not found, are the plugin name, version, and namespace correct?" | [[4, 5]] - """, + """ + ), ), ( "identity_identity", diff --git a/tools/Polygraphy/tests/tools/test_plugin.py b/tools/Polygraphy/tests/tools/test_plugin.py index a74f0b17..3215d7d5 100644 --- a/tools/Polygraphy/tests/tools/test_plugin.py +++ b/tools/Polygraphy/tests/tools/test_plugin.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -55,14 +55,14 @@ def test_match_toy(self, poly_plugin_match): num_plugins += 1 assert plugin["name"] == "toyPlugin" assert len(plugin["instances"]) == 1 - assert len(plugin["instances"][0]["inputs"]) == 2 + assert len(plugin["instances"][0]["inputs"]) == 1 assert len(plugin["instances"][0]["outputs"]) == 2 assert plugin["instances"][0]["attributes"]["ToyX"] == 2 assert num_plugins == 1 @pytest.mark.script_launch_mode("subprocess") - def test_match_list_toy(self, poly_plugin_list_plugins): + def test_list_toy(self, poly_plugin_list_plugins): status = poly_plugin_list_plugins( [self.TOY_MODEL_PATH, "--plugin-dir", self.PLUGINS_PATH] ) @@ -103,6 +103,6 @@ def test_replace_toy(self, poly_plugin_replace, poly_plugin_match): assert "n1" in node_names assert not node_names.intersection({"n2", "n3", "n4", "n5", "n6"}) - assert model.graph.node[1].op_type == "toyPlugin" + assert model.graph.node[1].op_type == "CustomToyPlugin" assert model.graph.node[1].attribute[0].name == "ToyX" assert model.graph.node[1].attribute[0].i == 2 diff --git a/tools/Polygraphy/tests/tools/test_polygraphy.py b/tools/Polygraphy/tests/tools/test_polygraphy.py index c3469484..a7257039 100644 --- a/tools/Polygraphy/tests/tools/test_polygraphy.py +++ b/tools/Polygraphy/tests/tools/test_polygraphy.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/tools/test_run.py b/tools/Polygraphy/tests/tools/test_run.py index 0e5e4a69..74bf4bcf 100644 --- a/tools/Polygraphy/tests/tools/test_run.py +++ b/tools/Polygraphy/tests/tools/test_run.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -72,12 +72,23 @@ def test_plugins(self, poly_run): ONNX_MODELS["identity"].path, "--trt", "--plugins", - "nvinfer_plugin.dll" if sys.platform.startswith("win") else "libnvinfer_plugin.so", + ( + "nvinfer_plugin.dll" + if sys.platform.startswith("win") + else "libnvinfer_plugin.so" + ), ] ) def test_custom_outputs(self, poly_run): - poly_run([ONNX_MODELS["identity_identity"].path, "--trt", "--trt-outputs", "identity_out_0"]) + poly_run( + [ + ONNX_MODELS["identity_identity"].path, + "--trt", + "--trt-outputs", + "identity_out_0", + ] + ) def test_layerwise_outputs(self, poly_run): with util.NamedTemporaryFile() as outfile0: @@ -125,10 +136,26 @@ def test_sparse_weights(self, poly_run): poly_run([ONNX_MODELS["identity"].path, "--trt", "--sparse-weights"]) def test_input_shape(self, poly_run): - poly_run([ONNX_MODELS["dynamic_identity"].path, "--trt", "--onnxrt", "--input-shapes", "X:[1,2,4,4]"]) + poly_run( + [ + ONNX_MODELS["dynamic_identity"].path, + "--trt", + "--onnxrt", + "--input-shapes", + "X:[1,2,4,4]", + ] + ) def test_dynamic_input_shape(self, poly_run): - poly_run([ONNX_MODELS["dynamic_identity"].path, "--trt", "--onnxrt", "--input-shapes", "X:[1,2,-1,4]"]) + poly_run( + [ + ONNX_MODELS["dynamic_identity"].path, + "--trt", + "--onnxrt", + "--input-shapes", + "X:[1,2,-1,4]", + ] + ) def test_explicit_profile(self, poly_run): poly_run( @@ -212,7 +239,9 @@ def test_multiple_profiles(self, poly_run, optimization_profile): mod.version(trt.__version__) < mod.version("10.0"), reason="Feature not present before 10.0", ) - @pytest.mark.parametrize("allocation_strategy", [None, "static", "profile", "runtime"]) + @pytest.mark.parametrize( + "allocation_strategy", [None, "static", "profile", "runtime"] + ) def test_allocation_strategies(self, poly_run, allocation_strategy): cmd = [ ONNX_MODELS["residual_block"].path, @@ -245,14 +274,28 @@ def test_allocation_strategies(self, poly_run, allocation_strategy): def test_int8_calibration_cache(self, poly_run): with util.NamedTemporaryFile() as outpath: - cmd = [ONNX_MODELS["identity"].path, "--trt", "--int8", "--calibration-cache", outpath.name] + cmd = [ + ONNX_MODELS["identity"].path, + "--trt", + "--int8", + "--calibration-cache", + outpath.name, + ] cmd += ["--onnxrt"] poly_run(cmd) assert is_file_non_empty(outpath.name) - @pytest.mark.parametrize("base_class", ["IInt8LegacyCalibrator", "IInt8EntropyCalibrator2"]) + @pytest.mark.parametrize( + "base_class", ["IInt8LegacyCalibrator", "IInt8EntropyCalibrator2"] + ) def test_int8_calibration_base_class(self, poly_run, base_class): - cmd = [ONNX_MODELS["identity"].path, "--trt", "--int8", "--calibration-base-class", base_class] + cmd = [ + ONNX_MODELS["identity"].path, + "--trt", + "--int8", + "--calibration-base-class", + base_class, + ] cmd += ["--onnxrt"] poly_run() @@ -262,39 +305,89 @@ def test_timing_cache(self, poly_run): total_cache = os.path.join(dir, "total.cache") identity_cache = os.path.join(dir, "identity.cache") - poly_run([ONNX_MODELS["const_foldable"].path, "--trt", "--save-timing-cache", total_cache]) + poly_run( + [ + ONNX_MODELS["const_foldable"].path, + "--trt", + "--save-timing-cache", + total_cache, + ] + ) assert is_file_non_empty(total_cache) const_foldable_cache_size = get_file_size(total_cache) - poly_run([ONNX_MODELS["identity"].path, "--trt", "--save-timing-cache", identity_cache]) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--trt", + "--save-timing-cache", + identity_cache, + ] + ) identity_cache_size = get_file_size(identity_cache) - poly_run([ONNX_MODELS["identity"].path, "--trt", "--save-timing-cache", total_cache]) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--trt", + "--save-timing-cache", + total_cache, + ] + ) total_cache_size = get_file_size(total_cache) # The total cache should be larger than either of the individual caches. - assert total_cache_size >= const_foldable_cache_size and total_cache_size >= identity_cache_size + assert ( + total_cache_size >= const_foldable_cache_size + and total_cache_size >= identity_cache_size + ) # The total cache should also be smaller than or equal to the sum of the individual caches since # header information should not be duplicated. assert total_cache_size <= (const_foldable_cache_size + identity_cache_size) def test_save_load_engine(self, poly_run): with util.NamedTemporaryFile() as outpath: - poly_run([ONNX_MODELS["identity"].path, "--trt", "--save-engine", outpath.name]) + poly_run( + [ONNX_MODELS["identity"].path, "--trt", "--save-engine", outpath.name] + ) assert is_file_non_empty(outpath.name) poly_run(["--trt", outpath.name, "--model-type=engine"]) def test_tactic_replay(self, poly_run): with util.NamedTemporaryFile() as tactic_replay: - poly_run([ONNX_MODELS["identity"].path, "--trt", "--save-tactics", tactic_replay.name]) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--trt", + "--save-tactics", + tactic_replay.name, + ] + ) assert is_file_non_empty(tactic_replay.name) - poly_run([ONNX_MODELS["identity"].path, "--trt", "--load-tactics", tactic_replay.name]) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--trt", + "--load-tactics", + tactic_replay.name, + ] + ) def test_tactic_sources(self, poly_run): - poly_run([ONNX_MODELS["identity"].path, "--trt", "--tactic-sources", "CUBLAS", "CUBLAS_LT"]) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--trt", + "--tactic-sources", + "CUBLAS", + "CUBLAS_LT", + ] + ) def test_pool_limits(self, poly_run): - poly_run([ONNX_MODELS["identity"].path, "--trt", "--pool-limit", "workspace:32M"]) + poly_run( + [ONNX_MODELS["identity"].path, "--trt", "--pool-limit", "workspace:32M"] + ) def test_data_loader_script_calibration(self, poly_run): with util.NamedTemporaryFile("w+", suffix=".py") as f: @@ -312,7 +405,15 @@ def load_data(): f.flush() os.fsync(f.fileno()) - poly_run([ONNX_MODELS["identity"].path, "--trt", "--int8", "--data-loader-script", f.name]) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--trt", + "--int8", + "--data-loader-script", + f.name, + ] + ) class TestTf: @@ -323,13 +424,29 @@ def test_tf(self, poly_run): def test_tf_save_pb(self, poly_run): pytest.importorskip("tensorflow") with util.NamedTemporaryFile() as outpath: - poly_run([TF_MODELS["identity"].path, "--tf", "--gpu-memory-fraction=0.5", "--save-pb", outpath.name]) + poly_run( + [ + TF_MODELS["identity"].path, + "--tf", + "--gpu-memory-fraction=0.5", + "--save-pb", + outpath.name, + ] + ) assert is_file_non_empty(outpath.name) def test_tf_save_tensorboard(self, poly_run): pytest.importorskip("tensorflow") with tempfile.TemporaryDirectory() as outdir: - poly_run([TF_MODELS["identity"].path, "--tf", "--gpu-memory-fraction=0.5", "--save-tensorboard", outdir]) + poly_run( + [ + TF_MODELS["identity"].path, + "--tf", + "--gpu-memory-fraction=0.5", + "--save-tensorboard", + outdir, + ] + ) files = glob.glob(f"{outdir}{os.path.sep}*") assert len(files) == 1 @@ -337,7 +454,15 @@ def test_tf_save_tensorboard(self, poly_run): def test_tf_save_timeline(self, poly_run): pytest.importorskip("tensorflow") with util.NamedTemporaryFile() as outpath: - poly_run([TF_MODELS["identity"].path, "--tf", "--gpu-memory-fraction=0.5", "--save-timeline", outpath.name]) + poly_run( + [ + TF_MODELS["identity"].path, + "--tf", + "--gpu-memory-fraction=0.5", + "--save-timeline", + outpath.name, + ] + ) timelines = glob.glob(os.path.join(outpath.name, "*")) for timeline in timelines: assert is_file_non_empty(timeline) @@ -354,12 +479,21 @@ def test_onnx_rt(self, poly_run): def test_onnx_rt_save_onnx(self, poly_run): with util.NamedTemporaryFile() as outpath: - poly_run([ONNX_MODELS["identity"].path, "--onnxrt", "--save-onnx", outpath.name]) + poly_run( + [ONNX_MODELS["identity"].path, "--onnxrt", "--save-onnx", outpath.name] + ) assert is_file_non_empty(outpath.name) assert onnx.load(outpath.name) def test_onnx_rt_custom_outputs(self, poly_run): - poly_run([ONNX_MODELS["identity_identity"].path, "--onnxrt", "--onnx-outputs", "identity_out_0"]) + poly_run( + [ + ONNX_MODELS["identity_identity"].path, + "--onnxrt", + "--onnx-outputs", + "identity_out_0", + ] + ) def test_onnx_rt_layerwise_outputs(self, poly_run): with util.NamedTemporaryFile() as outfile0: @@ -417,11 +551,37 @@ def test_subprocess_sanity(self, poly_run): def test_exit_status_on_fail_comparison(self, poly_run, tmp_path): OUTFILE0 = os.path.join(tmp_path, "outputs0.json") - poly_run([ONNX_MODELS["identity"].path, "--onnxrt", "--save-outputs", OUTFILE0, "--seed=1"]) - poly_run([ONNX_MODELS["identity"].path, "--onnxrt", "--load-outputs", OUTFILE0, "--seed=2"], expect_error=True) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--onnxrt", + "--save-outputs", + OUTFILE0, + "--seed=1", + ] + ) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--onnxrt", + "--load-outputs", + OUTFILE0, + "--seed=2", + ], + expect_error=True, + ) def test_custom_tolerance(self, poly_run): - poly_run([ONNX_MODELS["identity"].path, "--onnxrt", "--onnxrt", "--iterations=0", "--atol=1.0", "--rtol=1.0"]) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--onnxrt", + "--onnxrt", + "--iterations=0", + "--atol=1.0", + "--rtol=1.0", + ] + ) def test_custom_per_output_tolerance(self, poly_run): poly_run( @@ -444,14 +604,38 @@ def test_custom_per_output_tolerance(self, poly_run): ) def test_custom_input_ranges(self, poly_run): - poly_run([ONNX_MODELS["identity_identity"].path, "--onnxrt", "--val-range", "X:[1.0,2.0]", "[0.5,1.5]"]) + poly_run( + [ + ONNX_MODELS["identity_identity"].path, + "--onnxrt", + "--val-range", + "X:[1.0,2.0]", + "[0.5,1.5]", + ] + ) def test_index_comparison(self, poly_run): - poly_run([ONNX_MODELS["identity"].path, "--onnxrt", "--postprocess", "top-1", "--compare-func=indices"]) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--onnxrt", + "--postprocess", + "top-1", + "--compare-func=indices", + ] + ) @pytest.mark.parametrize("check_error_stat", ["max", "median", "mean", "quantile"]) def test_check_error_stat(self, poly_run, check_error_stat): - poly_run([ONNX_MODELS["identity"].path, "--onnxrt", "--onnxrt", "--check-error-stat", check_error_stat]) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--onnxrt", + "--onnxrt", + "--check-error-stat", + check_error_stat, + ] + ) def test_save_load_outputs(self, poly_run, tmp_path): OUTFILE0 = os.path.join(tmp_path, "outputs0.json") @@ -459,7 +643,15 @@ def test_save_load_outputs(self, poly_run, tmp_path): poly_run([ONNX_MODELS["identity"].path, "--onnxrt", "--save-outputs", OUTFILE0]) poly_run([ONNX_MODELS["identity"].path, "--onnxrt", "--save-outputs", OUTFILE1]) - status = poly_run([ONNX_MODELS["identity"].path, "--onnxrt", "--load-outputs", OUTFILE0, OUTFILE1]) + status = poly_run( + [ + ONNX_MODELS["identity"].path, + "--onnxrt", + "--load-outputs", + OUTFILE0, + OUTFILE1, + ] + ) assert ( "Difference is within tolerance" in status.stdout + status.stderr ) # Make sure it actually compared stuff. @@ -471,20 +663,31 @@ def test_save_load_outputs(self, poly_run, tmp_path): ) # Make sure it DIDN'T compare stuff. # Should work even with no runners specified - status = poly_run([ONNX_MODELS["identity"].path, "--load-outputs", OUTFILE0, OUTFILE1]) + status = poly_run( + [ONNX_MODELS["identity"].path, "--load-outputs", OUTFILE0, OUTFILE1] + ) assert ( "Difference is within tolerance" in status.stdout + status.stderr ) # Make sure it actually compared stuff. # Should work even when comparing a single runner to itself. - status = poly_run([ONNX_MODELS["identity"].path, "--load-outputs", OUTFILE0, OUTFILE0]) + status = poly_run( + [ONNX_MODELS["identity"].path, "--load-outputs", OUTFILE0, OUTFILE0] + ) assert ( "Difference is within tolerance" in status.stdout + status.stderr ) # Make sure it actually compared stuff. def test_save_load_inputs(self, poly_run): with util.NamedTemporaryFile() as infile0, util.NamedTemporaryFile() as infile1: - poly_run([ONNX_MODELS["identity"].path, "--onnxrt", "--save-input-data", infile0.name]) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--onnxrt", + "--save-input-data", + infile0.name, + ] + ) poly_run( [ ONNX_MODELS["identity"].path, @@ -495,14 +698,30 @@ def test_save_load_inputs(self, poly_run): infile1.name, ] ) # Copy - poly_run([ONNX_MODELS["identity"].path, "--onnxrt", "--load-input-data", infile0.name, infile1.name]) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--onnxrt", + "--load-input-data", + infile0.name, + infile1.name, + ] + ) def test_load_torch_inputs(self, poly_run): with util.NamedTemporaryFile() as infile: inp = torch.ones((1, 1, 2, 2), dtype=torch.float32) feed_dict = [{"x": inp}] save_json(feed_dict, infile.name) - poly_run([ONNX_MODELS["identity"].path, "--onnxrt", "--onnxrt", "--load-inputs", infile.name]) + poly_run( + [ + ONNX_MODELS["identity"].path, + "--onnxrt", + "--onnxrt", + "--load-inputs", + infile.name, + ] + ) def test_runner_coexistence(self, poly_run): poly_run([ONNX_MODELS["identity"].path, "--onnxrt", "--trt"]) @@ -514,7 +733,15 @@ def test_tf2onnxrt(self, poly_run): def test_tf2onnx_save_onnx(self, poly_run): pytest.importorskip("tensorflow") with util.NamedTemporaryFile() as outpath: - poly_run([TF_MODELS["identity"].path, "--onnxrt", "--model-type=frozen", "--save-onnx", outpath.name]) + poly_run( + [ + TF_MODELS["identity"].path, + "--onnxrt", + "--model-type=frozen", + "--save-onnx", + outpath.name, + ] + ) assert is_file_non_empty(outpath.name) assert onnx.load(outpath.name) diff --git a/tools/Polygraphy/tests/tools/test_script.py b/tools/Polygraphy/tests/tools/test_script.py index 4dabb64a..acfebb2c 100644 --- a/tools/Polygraphy/tests/tools/test_script.py +++ b/tools/Polygraphy/tests/tools/test_script.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,7 +17,12 @@ import pytest from polygraphy.exception import PolygraphyInternalException -from polygraphy.tools.script import Script, inline, make_invocable, make_invocable_if_nondefault +from polygraphy.tools.script import ( + Script, + inline, + make_invocable, + make_invocable_if_nondefault, +) def make_test_string(): @@ -38,7 +43,9 @@ class TestScript: ) def test_add_funcs_fail_on_unsafe(self, func): script = Script() - with pytest.raises(PolygraphyInternalException, match="was not checked for safety"): + with pytest.raises( + PolygraphyInternalException, match="was not checked for safety" + ): func(script) @pytest.mark.parametrize( diff --git a/tools/Polygraphy/tests/tools/test_surgeon.py b/tools/Polygraphy/tests/tools/test_surgeon.py index 88a4ab61..bbb594a4 100644 --- a/tools/Polygraphy/tests/tools/test_surgeon.py +++ b/tools/Polygraphy/tests/tools/test_surgeon.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -35,6 +35,7 @@ def onnx_model_sanity_check_impl(model_path): return onnx_model_sanity_check_impl + def get_exclude_list(exclude_list): if not exclude_list: return set() @@ -42,29 +43,34 @@ def get_exclude_list(exclude_list): lines = [line.rstrip() for line in fp] return set(lines) + def pruned_initializer_sanity_check(opath, is_sparse=False, exclude_list=None): exclude_list = get_exclude_list(exclude_list) # we only prune the input data of QuantizeLinear and leave the scale and zero_point untouched - if 'qdq' in opath: - exclude_list.add('y_scale') - exclude_list.add('y_zero_point') + if "qdq" in opath: + exclude_list.add("y_scale") + exclude_list.add("y_zero_point") model = onnx.load(opath) for initializer in model.graph.initializer: # If initializer is to be left un-stripped if initializer.name in exclude_list: # ensure initializer is non-empty and doc_string doesn't contain the weightless flag - shape_match = list(numpy_helper.to_array(initializer).shape) == initializer.dims + shape_match = ( + list(numpy_helper.to_array(initializer).shape) == initializer.dims + ) if "TRT_WEIGHTLESS" in initializer.doc_string or not shape_match: return False continue # ensure initializer is empty and doc_string is in required format init_empty = initializer.raw_data == b"" - trt_weightless, sparsity = initializer.doc_string.split('/') + trt_weightless, sparsity = initializer.doc_string.split("/") trt_weightless_correctness = trt_weightless == "TRT_WEIGHTLESS" sparsity_correctness = False - if (not is_sparse and sparsity == "") or (is_sparse and sparsity == "SPARSE_2_4"): + if (not is_sparse and sparsity == "") or ( + is_sparse and sparsity == "SPARSE_2_4" + ): sparsity_correctness = True if not (init_empty and trt_weightless_correctness and sparsity_correctness): @@ -72,6 +78,7 @@ def pruned_initializer_sanity_check(opath, is_sparse=False, exclude_list=None): return True + def get_initializers_to_sparsify(ipath): model = onnx.load(ipath) initializers_to_sparsify = set() @@ -81,6 +88,7 @@ def get_initializers_to_sparsify(ipath): return initializers_to_sparsify + def reconstructed_initializer_sanity_check(opath, initializers_to_sparsify): model = onnx.load(opath) sparsity_checker = SparsityPruner(model) @@ -92,13 +100,19 @@ def reconstructed_initializer_sanity_check(opath, initializers_to_sparsify): return False # ensure sparsity of initializers is retained - if initializer.name in initializers_to_sparsify and initializer.name not in sparse_tensors: + if ( + initializer.name in initializers_to_sparsify + and initializer.name not in sparse_tensors + ): return False return True + def was_shape_inference_run(status, model): - logging_correct = "Shape inference completed successfully" in (status.stdout + status.stderr) + logging_correct = "Shape inference completed successfully" in ( + status.stdout + status.stderr + ) has_shape = True model = onnx.load(model) @@ -109,15 +123,25 @@ def was_shape_inference_run(status, model): class TestSurgeonExtract: - def test_no_shape_inference_if_has_metadata(self, poly_surgeon_extract, onnx_model_sanity_check): + def test_no_shape_inference_if_has_metadata( + self, poly_surgeon_extract, onnx_model_sanity_check + ): with util.NamedTemporaryFile() as outmodel: status = poly_surgeon_extract( - [ONNX_MODELS["identity_identity"].path, "-o", outmodel.name, "--inputs", "X:auto:auto"] + [ + ONNX_MODELS["identity_identity"].path, + "-o", + outmodel.name, + "--inputs", + "X:auto:auto", + ] ) onnx_model_sanity_check(outmodel.name) assert not was_shape_inference_run(status, outmodel.name) - def test_onnx_shape_inference_if_no_metadata(self, poly_surgeon_extract, onnx_model_sanity_check): + def test_onnx_shape_inference_if_no_metadata( + self, poly_surgeon_extract, onnx_model_sanity_check + ): with util.NamedTemporaryFile() as outmodel: status = poly_surgeon_extract( [ @@ -131,7 +155,9 @@ def test_onnx_shape_inference_if_no_metadata(self, poly_surgeon_extract, onnx_mo onnx_model_sanity_check(outmodel.name) assert was_shape_inference_run(status, outmodel.name) - def test_fallback_shape_inference_no_onnx_shape_inference(self, poly_surgeon_extract, onnx_model_sanity_check): + def test_fallback_shape_inference_no_onnx_shape_inference( + self, poly_surgeon_extract, onnx_model_sanity_check + ): with util.NamedTemporaryFile() as outmodel: status = poly_surgeon_extract( [ @@ -177,7 +203,13 @@ def test_sanity_dim_param(self, poly_surgeon_extract, onnx_model_sanity_check): class TestSurgeonInsert: - def check_insert_model(self, path, expected_node_ops, expected_graph_input_names, expected_graph_output_names): + def check_insert_model( + self, + path, + expected_node_ops, + expected_graph_input_names, + expected_graph_output_names, + ): model = onnx.load(path) assert [node.op_type for node in model.graph.node] == expected_node_ops @@ -203,7 +235,12 @@ def test_insert_at_tensor(self, poly_surgeon): "--op=FakeOp", ] ) - self.check_insert_model(outmodel.name, ["Identity", "FakeOp", "Identity"], ["X"], ["identity_out_2"]) + self.check_insert_model( + outmodel.name, + ["Identity", "FakeOp", "Identity"], + ["X"], + ["identity_out_2"], + ) def test_graph_output(self, poly_surgeon): # FakeOp output tensor should be marked as a graph output. Name should be preserved - identity_out_2 @@ -219,7 +256,12 @@ def test_graph_output(self, poly_surgeon): "--op=FakeOp", ] ) - self.check_insert_model(outmodel.name, ["Identity", "Identity", "FakeOp"], ["X"], ["identity_out_2"]) + self.check_insert_model( + outmodel.name, + ["Identity", "Identity", "FakeOp"], + ["X"], + ["identity_out_2"], + ) def test_at_graph_input(self, poly_surgeon): with util.NamedTemporaryFile() as outmodel: @@ -234,7 +276,12 @@ def test_at_graph_input(self, poly_surgeon): "--op=FakeOp", ] ) - self.check_insert_model(outmodel.name, ["FakeOp", "Identity", "Identity"], ["X"], ["identity_out_2"]) + self.check_insert_model( + outmodel.name, + ["FakeOp", "Identity", "Identity"], + ["X"], + ["identity_out_2"], + ) # When a specified input tensor is used by multiple other nodes, it should not be # disconnected from other nodes. @@ -309,7 +356,10 @@ def test_with_attributes(self, poly_surgeon): ] ) model = self.check_insert_model( - outmodel.name, ["FakeOp", "Identity", "Identity"], ["X"], ["identity_out_2"] + outmodel.name, + ["FakeOp", "Identity", "Identity"], + ["X"], + ["identity_out_2"], ) node = model.graph.node[0] @@ -337,10 +387,14 @@ def test_with_attributes(self, poly_surgeon): class TestSurgeonSanitize: - @pytest.mark.parametrize("no_per_pass_shape_inf", [None, "--no-per-pass-shape-inference"]) + @pytest.mark.parametrize( + "no_per_pass_shape_inf", [None, "--no-per-pass-shape-inference"] + ) @pytest.mark.parametrize("fold_shapes", [None, "--no-fold-shapes"]) @pytest.mark.parametrize("partitioning", [None, "basic", "recursive"]) - @pytest.mark.parametrize("no_onnxruntime_shape_inference", [None, "--no-onnxruntime-shape-inference"]) + @pytest.mark.parametrize( + "no_onnxruntime_shape_inference", [None, "--no-onnxruntime-shape-inference"] + ) def test_fold_constants( self, poly_surgeon, @@ -351,7 +405,14 @@ def test_fold_constants( no_onnxruntime_shape_inference, ): with util.NamedTemporaryFile() as outmodel: - cmd = ["sanitize", ONNX_MODELS["const_foldable"].path, "-o", outmodel.name, "--fold-constants", "-v"] + cmd = [ + "sanitize", + ONNX_MODELS["const_foldable"].path, + "-o", + outmodel.name, + "--fold-constants", + "-v", + ] if fold_shapes: cmd += [fold_shapes] if partitioning: @@ -362,9 +423,10 @@ def test_fold_constants( cmd += [no_onnxruntime_shape_inference] status = poly_surgeon(cmd) - assert ("Inferred shapes in the model with `onnxruntime.tools.symbolic_shape_infer`" in status.stdout) == ( - no_onnxruntime_shape_inference is None - ) + assert ( + "Inferred shapes in the model with `onnxruntime.tools.symbolic_shape_infer`" + in status.stdout + ) == (no_onnxruntime_shape_inference is None) onnx_model_sanity_check(outmodel.name) model = onnx.load(outmodel.name) @@ -372,7 +434,13 @@ def test_fold_constants( @pytest.mark.parametrize("global_upper_bound", [None, "2000"]) @pytest.mark.parametrize("specified_upper_bound", [None, "cast_out_6:4000"]) - def test_set_upper_bound(self, poly_surgeon, global_upper_bound, specified_upper_bound, onnx_model_sanity_check): + def test_set_upper_bound( + self, + poly_surgeon, + global_upper_bound, + specified_upper_bound, + onnx_model_sanity_check, + ): with util.NamedTemporaryFile() as outmodel: cmd = [ "sanitize", @@ -474,8 +542,12 @@ def test_override_shapes_partial_inputs(self, poly_surgeon): ] ) model = onnx.load(outmodel.name) - assert model.graph.input[0].type.tensor_type.shape.dim[2].dim_param == "height" - assert model.graph.input[0].type.tensor_type.shape.dim[3].dim_param == "width" + assert ( + model.graph.input[0].type.tensor_type.shape.dim[2].dim_param == "height" + ) + assert ( + model.graph.input[0].type.tensor_type.shape.dim[3].dim_param == "width" + ) def test_override_shapes_no_reorder(self, poly_surgeon): with util.NamedTemporaryFile() as outmodel: @@ -497,7 +569,15 @@ def test_override_shapes_no_reorder(self, poly_surgeon): def test_modify_onnx_outputs(self, poly_surgeon): with util.NamedTemporaryFile(suffix=".onnx") as outmodel: poly_surgeon( - ["sanitize", ONNX_MODELS["identity_identity"].path, "-o", outmodel.name, "--outputs", "mark", "all"] + [ + "sanitize", + ONNX_MODELS["identity_identity"].path, + "-o", + outmodel.name, + "--outputs", + "mark", + "all", + ] ) model = onnx.load(outmodel.name) @@ -561,7 +641,9 @@ def test_external_data(self, poly_surgeon, poly_run): assert is_file_non_empty(os.path.join(outdir, outdata)) assert poly_run([outmodel, "--onnxrt", "--external-data-dir", outdir]) - def test_force_fallback_shape_inference_will_override_model_shapes(self, poly_surgeon, onnx_model_sanity_check): + def test_force_fallback_shape_inference_will_override_model_shapes( + self, poly_surgeon, onnx_model_sanity_check + ): with util.NamedTemporaryFile() as outmodel: status = poly_surgeon( [ @@ -589,7 +671,9 @@ def test_force_fallback_shape_inference_will_override_model_shapes(self, poly_su ("9.99M", False), ], ) - def test_size_threshold(self, poly_surgeon, size_threshold, expect_folding, onnx_model_sanity_check): + def test_size_threshold( + self, poly_surgeon, size_threshold, expect_folding, onnx_model_sanity_check + ): with util.NamedTemporaryFile() as outmodel: poly_surgeon( [ @@ -614,7 +698,10 @@ def test_size_threshold(self, poly_surgeon, size_threshold, expect_folding, onnx class TestSurgeonPrune: - @pytest.mark.parametrize("model_name", ["matmul", "matmul.fp16", "matmul.bf16", "matmul.bf16.i32data", "conv"]) + @pytest.mark.parametrize( + "model_name", + ["matmul", "matmul.fp16", "matmul.bf16", "matmul.bf16.i32data", "conv"], + ) def test_prune(self, poly_surgeon, onnx_model_sanity_check, model_name): with tempfile.TemporaryDirectory() as outdir: ipath = ONNX_MODELS[model_name].path @@ -624,9 +711,21 @@ def test_prune(self, poly_surgeon, onnx_model_sanity_check, model_name): if "bf16" not in ipath: onnx_model_sanity_check(opath) + class TestSurgeonWeightStrip: - @pytest.mark.parametrize("model_name", ["matmul", "matmul.fp16", "matmul.bf16", "conv", "sparse.matmul", "sparse.conv", - "transpose_matmul", "qdq_conv"]) + @pytest.mark.parametrize( + "model_name", + [ + "matmul", + "matmul.fp16", + "matmul.bf16", + "conv", + "sparse.matmul", + "sparse.conv", + "transpose_matmul", + "qdq_conv", + ], + ) def test_weight_strip(self, poly_surgeon, model_name): with tempfile.TemporaryDirectory() as outdir: ipath = ONNX_MODELS[model_name].path @@ -638,25 +737,45 @@ def test_weight_strip(self, poly_surgeon, model_name): assert pruned_initializer_sanity_check(opath, is_sparse=is_sparse) @pytest.mark.parametrize( - "model_name, exclude_list", [ - ["matmul", "matmul.exclude_list.txt"], - ["sparse.conv", "sparse.conv.exclude_list.txt"], - ["qdq_conv", "qdq_conv.exclude_list.txt"]]) + "model_name, exclude_list", + [ + ["matmul", "matmul.exclude_list.txt"], + ["sparse.conv", "sparse.conv.exclude_list.txt"], + ["qdq_conv", "qdq_conv.exclude_list.txt"], + ], + ) def test_weight_strip_exclude_file(self, poly_surgeon, model_name, exclude_list): with tempfile.TemporaryDirectory() as outdir: ipath = ONNX_MODELS[model_name].path exclude_list = model_path(exclude_list) opath = os.path.join(outdir, "weightless_sparse." + os.path.basename(ipath)) - status = poly_surgeon(["weight-strip", ipath, "-o", opath, "--exclude-list", exclude_list]) + status = poly_surgeon( + ["weight-strip", ipath, "-o", opath, "--exclude-list", exclude_list] + ) assert status is_sparse = "sparse" in ipath - assert pruned_initializer_sanity_check(opath, is_sparse=is_sparse, exclude_list=exclude_list) + assert pruned_initializer_sanity_check( + opath, is_sparse=is_sparse, exclude_list=exclude_list + ) + class TestSurgeonWeightReconstruct: - @pytest.mark.parametrize("model_name", ["weightless.matmul.fp16", "weightless.matmul.bf16", "weightless.conv", "weightless.sparse.matmul", - "weightless.sparse.conv", "weightless.transpose_matmul", "weightless.qdq_conv"]) - def test_weight_reconstruct(self, poly_surgeon, onnx_model_sanity_check, model_name): + @pytest.mark.parametrize( + "model_name", + [ + "weightless.matmul.fp16", + "weightless.matmul.bf16", + "weightless.conv", + "weightless.sparse.matmul", + "weightless.sparse.conv", + "weightless.transpose_matmul", + "weightless.qdq_conv", + ], + ) + def test_weight_reconstruct( + self, poly_surgeon, onnx_model_sanity_check, model_name + ): with tempfile.TemporaryDirectory() as outdir: ipath = ONNX_MODELS[model_name].path opath = os.path.join(outdir, "reconstruct." + os.path.basename(ipath)) @@ -666,4 +785,6 @@ def test_weight_reconstruct(self, poly_surgeon, onnx_model_sanity_check, model_n onnx_model_sanity_check(opath) initializers_to_sparsify = get_initializers_to_sparsify(ipath) - assert reconstructed_initializer_sanity_check(opath, initializers_to_sparsify) \ No newline at end of file + assert reconstructed_initializer_sanity_check( + opath, initializers_to_sparsify + ) diff --git a/tools/Polygraphy/tests/tools/test_template.py b/tools/Polygraphy/tests/tools/test_template.py index 7f6ba23b..e4ad9dec 100644 --- a/tools/Polygraphy/tests/tools/test_template.py +++ b/tools/Polygraphy/tests/tools/test_template.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,7 +40,9 @@ def test_no_model_file(self, poly_template): def test_with_model_file(self, poly_template): with util.NamedTemporaryFile("w+", suffix=".py") as template: - poly_template(["trt-network", ONNX_MODELS["identity"].path, "-o", template.name]) + poly_template( + ["trt-network", ONNX_MODELS["identity"].path, "-o", template.name] + ) load_network = InvokeFromScript(template.name, "load_network") builder, network, parser = load_network() diff --git a/tools/Polygraphy/tests/util/test_array.py b/tools/Polygraphy/tests/util/test_array.py index 8450be68..eaea1ef8 100644 --- a/tools/Polygraphy/tests/util/test_array.py +++ b/tools/Polygraphy/tests/util/test_array.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -239,10 +239,16 @@ def test_binary_funcs(self, obj, np_arr, func, np_func): @pytest.mark.parametrize( "func, np_func, types", [ - (util.array.where, np.where, tuple(map(DataType.from_dtype, (np.bool8, np.float32, np.float32)))), + ( + util.array.where, + np.where, + tuple(map(DataType.from_dtype, (np.bool8, np.float32, np.float32))), + ), ], ) def test_ternary_funcs(self, obj, np_arr, func, np_func, types): - build_inputs = lambda input: map(lambda pair: util.array.cast(input + pair[0], pair[1]), enumerate(types)) + build_inputs = lambda input: map( + lambda pair: util.array.cast(input + pair[0], pair[1]), enumerate(types) + ) obj = func(*build_inputs(obj)) assert util.array.equal(obj, np.array(np_func(*build_inputs(np_arr)))) diff --git a/tools/Polygraphy/tests/util/test_serde.py b/tools/Polygraphy/tests/util/test_serde.py index 19dcc8bf..89d6d198 100644 --- a/tools/Polygraphy/tests/util/test_serde.py +++ b/tools/Polygraphy/tests/util/test_serde.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/Polygraphy/tests/util/test_util.py b/tools/Polygraphy/tests/util/test_util.py index 6c2f302c..02943c12 100644 --- a/tools/Polygraphy/tests/util/test_util.py +++ b/tools/Polygraphy/tests/util/test_util.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -49,12 +49,23 @@ def __init__(self, name, seq, index, expected): FIND_STR_IN_ITERABLE_CASES = [ # Case insensitve, plus function should return element from sequence, not name. - FindStrInIterableCase("Softmax:0", seq=["Softmax:0"], index=None, expected="Softmax:0"), - FindStrInIterableCase("Softmax:0", seq=["softmax:0"], index=None, expected="softmax:0"), + FindStrInIterableCase( + "Softmax:0", seq=["Softmax:0"], index=None, expected="Softmax:0" + ), + FindStrInIterableCase( + "Softmax:0", seq=["softmax:0"], index=None, expected="softmax:0" + ), # Exact matches should take priority - FindStrInIterableCase("exact_name", seq=["exact_name_plus", "exact_name"], index=0, expected="exact_name"), + FindStrInIterableCase( + "exact_name", + seq=["exact_name_plus", "exact_name"], + index=0, + expected="exact_name", + ), # Index should come into play when no matches are found - FindStrInIterableCase("non-existent", seq=["test", "test2"], index=1, expected="test2"), + FindStrInIterableCase( + "non-existent", seq=["test", "test2"], index=1, expected="test2" + ), ] @@ -72,7 +83,10 @@ def test_find_str_in_iterable(case): @pytest.mark.parametrize("case", SHAPE_OVERRIDE_CASES) def test_is_valid_shape_override(case): override, shape, expected = case - assert util.is_valid_shape_override(new_shape=override, original_shape=shape) == expected + assert ( + util.is_valid_shape_override(new_shape=override, original_shape=shape) + == expected + ) def arange(shape): @@ -88,8 +102,16 @@ def arange(shape): ), # Permutation should make no difference as other dimensions are 1s (arange((3, 3)), (1, 1, 3, 3), arange((1, 1, 3, 3))), # Unsqueeze where needed (arange((3, 3)), (-1, 3), arange((3, 3))), # Infer dynamic - (arange((3 * 2 * 2,)), (None, 3, 2, 2), arange((1, 3, 2, 2))), # Reshape with inferred dimension - (arange((1, 3, 2, 2)), (None, 2, 2, 3), np.transpose(arange((1, 3, 2, 2)), [0, 2, 3, 1])), # Permute + ( + arange((3 * 2 * 2,)), + (None, 3, 2, 2), + arange((1, 3, 2, 2)), + ), # Reshape with inferred dimension + ( + arange((1, 3, 2, 2)), + (None, 2, 2, 3), + np.transpose(arange((1, 3, 2, 2)), [0, 2, 3, 1]), + ), # Permute ] build_torch = lambda a, **kwargs: util.array.to_torch(np.array(a, **kwargs)) @@ -132,7 +154,12 @@ def test_unique_list(case): def test_find_in_dirs(): with tempfile.TemporaryDirectory() as topdir: - dirs = list(map(lambda x: os.path.join(topdir, x), ["test0", "test1", "test2", "test3", "test4"])) + dirs = list( + map( + lambda x: os.path.join(topdir, x), + ["test0", "test1", "test2", "test3", "test4"], + ) + ) for subdir in dirs: os.makedirs(subdir) @@ -171,7 +198,10 @@ def write_to_file(path, content): outfile = util.NamedTemporaryFile() processes = [ - Process(target=write_to_file, args=(outfile.name, f"{proc} - writing line\n" * NUM_LINES)) + Process( + target=write_to_file, + args=(outfile.name, f"{proc} - writing line\n" * NUM_LINES), + ) for proc in range(NUM_PROCESSES) ] @@ -194,7 +224,10 @@ def write_to_file(path, content): for idx in range(NUM_PROCESSES): offset = idx * NUM_LINES expected_prefix = lines[offset].partition("-")[0].strip() - assert all(line.startswith(expected_prefix) for line in lines[offset : offset + NUM_LINES]) + assert all( + line.startswith(expected_prefix) + for line in lines[offset : offset + NUM_LINES] + ) # Make sure the lock file is written to the correct path and not removed automatically. assert os.path.exists(outfile.name + ".lock") @@ -205,20 +238,32 @@ def test_basic(self): assert util.make_repr("Example", 1, x=2) == ("Example(1, x=2)", False, False) def test_default_args(self): - assert util.make_repr("Example", None, None, x=2) == ("Example(None, None, x=2)", True, False) + assert util.make_repr("Example", None, None, x=2) == ( + "Example(None, None, x=2)", + True, + False, + ) def test_empty_args_are_default(self): assert util.make_repr("Example", x=2) == ("Example(x=2)", True, False) def test_default_kwargs(self): - assert util.make_repr("Example", 1, 2, x=None, y=None) == ("Example(1, 2)", False, True) + assert util.make_repr("Example", 1, 2, x=None, y=None) == ( + "Example(1, 2)", + False, + True, + ) def test_empty_kwargs_are_default(self): assert util.make_repr("Example", 1, 2) == ("Example(1, 2)", False, True) def test_does_not_modify(self): obj = {"x": float("inf")} - assert util.make_repr("Example", obj) == ("Example({'x': float('inf')})", False, True) + assert util.make_repr("Example", obj) == ( + "Example({'x': float('inf')})", + False, + True, + ) assert obj == {"x": float("inf")} @pytest.mark.parametrize("obj", [float("nan"), float("inf"), float("-inf")]) diff --git a/tools/experimental/trt-engine-explorer/trex/graphing.py b/tools/experimental/trt-engine-explorer/trex/graphing.py index c378fccc..9f1fc7e4 100644 --- a/tools/experimental/trt-engine-explorer/trex/graphing.py +++ b/tools/experimental/trt-engine-explorer/trex/graphing.py @@ -735,6 +735,7 @@ def __add_dot_layer_nodes(self, plan, plan_graph, node_name_2_node_id): for layer_node in plan_graph.layer_nodes: layer = layer_node.layer latency = _get_latency(plan, layer, self.latency_type) + if not layer.type == 'Constant' or plan_graph.include_constants: dot_id = _get_dot_id(layer.name) node_name_2_node_id[layer.name] = dot_id diff --git a/tools/onnx-graphsurgeon/CHANGELOG.md b/tools/onnx-graphsurgeon/CHANGELOG.md index 3d0be18f..7f555d53 100644 --- a/tools/onnx-graphsurgeon/CHANGELOG.md +++ b/tools/onnx-graphsurgeon/CHANGELOG.md @@ -2,6 +2,10 @@ Dates are in YYYY-MM-DD format. +## v0.5.2 (2024-04-01) +### Added +- Added `export_dtype` field to `gs.Constant` to allow numpy-unsupported dtypes such as BFloat16. + ## v0.5.1 (2024-02-23) ### Changed diff --git a/tools/onnx-graphsurgeon/examples/12_using_bf16/README.md b/tools/onnx-graphsurgeon/examples/12_using_bf16/README.md new file mode 100644 index 00000000..3b8f2571 --- /dev/null +++ b/tools/onnx-graphsurgeon/examples/12_using_bf16/README.md @@ -0,0 +1,26 @@ +# BFloat16 + +## Introduction + +This example generates a model with bf16 weights. + +Numpy currently doesn't support bf16 natively so data values are stored as float32 and the conversion happens prior to onnx export. +```python +tensor = gs.Constant(name="weight", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32), export_dtype=onnx.TensorProto.BFLOAT16) +# or +tensor = gs.Constant(name="weight", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32)) +tensor.export_dtype = onnx.TensorProto.BFLOAT16 + +``` + +## Running the example + +1. Generate the model: + ```bash + python3 generate.py + ``` + + This creates a model with bfloat16 weights + + ![../resources/12_bf16.onnx.png](../resources/12_bf16.onnx.png) + diff --git a/demo/Tacotron2/loss_functions.py b/tools/onnx-graphsurgeon/examples/12_using_bf16/generate.py similarity index 52% rename from demo/Tacotron2/loss_functions.py rename to tools/onnx-graphsurgeon/examples/12_using_bf16/generate.py index 7ee1a5b2..ebfe67a1 100644 --- a/demo/Tacotron2/loss_functions.py +++ b/tools/onnx-graphsurgeon/examples/12_using_bf16/generate.py @@ -1,5 +1,6 @@ +#!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,20 +16,18 @@ # limitations under the License. # -import torch -import torch.nn as nn -from tacotron2.loss_function import Tacotron2Loss -from waveglow.loss_function import WaveGlowLoss +import onnx_graphsurgeon as gs +import numpy as np +import onnx +BF16 = onnx.TensorProto.BFLOAT16 -def get_loss_function(loss_function, sigma=1.0): - if loss_function == 'Tacotron2': - loss = Tacotron2Loss() - elif loss_function == 'WaveGlow': - loss = WaveGlowLoss(sigma=sigma) - else: - raise NotImplementedError( - "unknown loss function requested: {}".format(loss_function)) +X = gs.Variable(name="X", dtype=BF16, shape=(1, 3, 224, 224)) +W = gs.Constant(name="W", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32) * 0.5, export_dtype=BF16) +Y = gs.Variable(name="Y", dtype=BF16, shape=(1, 5, 222, 222)) - loss.cuda() - return loss +node = gs.Node(op="Conv", inputs=[X, W], outputs=[Y]) + +graph = gs.Graph(nodes=[node], inputs=[X], outputs=[Y]) + +onnx.save(gs.export_onnx(graph), "test_conv_bf16.onnx") diff --git a/tools/onnx-graphsurgeon/examples/resources/12_bf16.onnx.png b/tools/onnx-graphsurgeon/examples/resources/12_bf16.onnx.png new file mode 100644 index 0000000000000000000000000000000000000000..8f2997ec65bbc6829d1bd285d245ee8a9a8e4a77 GIT binary patch literal 36504 zcmdSAWl&sEv@VEiaCf)h?hxE9xVyVMjRkjtYl6GGd+-3k-2;ugGfnQ?A8+Q>yLD$? zP1WqGF6uP1wKU|>ix(&Asiz#wKppAtA2&^Hy|%9lWYpj<^| z)ZjqP56&zC^o-{wq2;FPXyN8*>|zdP>ELK@&gg3DVs7r>YUSv54%s6F>csfbNzBFE z*v;C}fmF@f-W*KL-JFz#lT^Xjg_Mnjjf0ehm5-B`kCTN|S%FkpRgKpccmW1R3ML~i zqUM!-vJTLjzg>U79O<1FnHCX*y(YYtMw9$XCiAnXa*)d$(L=vp-}#ovOG^mq@ zuO2|yJd*Qiplk2Ib*F#M{iCdn&2bMZ8n4{RdPhr(94Z~{^*d1N-G`x(IfQB|I;CO znQ!W(FznA=`%U}p#Uh9aon?3F{tfyo&4AB=ORGpBWAH_COxb@-*w6>|c*A0@S;_hl zXC(BfK$IhI*B5|DMRInyB{TdfJ~Gm~(Q+Icjn^F(negu^y14kB-0}5m-i5U*AdHXV z2xB9@6tMu#whq;QK1C}0FNz-x(hL~QSu-};V|~3idw>ghTnO>lyNaxR?LhU?rF?ro z_+*ovPZObGiK=~CoaI$(;$S=ZkG9<7t4TY*0(3*$l3kNz89cOD!LhJO6C)VN2M^6_ z18cf@q#VS0G++PEX()fowUSqMn!bB7L6J^~yl9R%wT(wI&h|d6QPytC-QF>N2W%A# z*c*4yW_%yCIzi@d7Bb>`ygc=ZpKbYk$>Hg)PcUQpH6y_dz8V~@@xjI$AcsaUE+`d? zh^<#Joak4FI08l>NrKTZBW3T-TV&n@#V5cNH%`MI&^j|eWL3E)Jp&s=v8%t+kG5>w zNwoi6!wq*DZ94S(wzvnc4uRn#AD2I&fXn*pdEI4`4TRI4Wqt()iVy9pE?w>biUh}Q z&zrmCup}DZhG;=aoIMo*-TF50Ml0gi@>y?n%ktoS3<+;&gb;EV4Y3b4PZuf@==!;I zM@jDuue!*5g54}Fn$WMkbFy-YnIgYW)q1dFYyytQL+#aCQLnv%HPA?!9YWSH3;)h^%ns zX7k$LJ5OkzW7Y1B3dpWb6Xb5V3$_K>EC_W37S(vi*%5N&u8~Yp3r>0bsY`wrj6P|( z(!QAU`wSUw4$?61E-e3P2=o-DC-F#wyq+DcnC;0;o_Xxy)IJYGoHWh z!&~4mQU;uZpJm}9L0x`#G@IjMZbiof_>))v*d5JTj5s_i`tE_O(Ji~+;1~a8lQzsS zQGRC#d8fylBz|=(HxHYLeV1rWdu*8KmZB+ty5D5s3(@``Ia}MqzMYp8D{NVa#%0W!ZVS`$wiimT>;%1q{ad zbF7<(m%!mZWqjlarya!HS#{CN^W&c<`sGHlBwDcO=)vz^gdgiHc47X4*u<}7-@Vk} z&Uo&{Z}8wR^h($(dZn;zPsXP_xuOnypf&146HIytsj1f{)7i_$1X+^$Xd?7NolOD;CS?__BL?Y1(@(p@aS`Km;JjhU22c)sHZS=3~kT%2fnhk4% zkrYj@4ZLWg4DI_Zm@}Jg1BOiqTrpH7E2$i%N>)58DVS@j?p|U>yp1KvQxTbIE+I-! zaMkX=*I%WD6DRKic{a+kau2@gn$!(aCs~b`+$ED=VdFFr{gpnvwB2Iuc zGUtT0H^4dF(AZkxYA4>Y`o3~M;9mJMaD0b@0$sw|9g(fT*Hh=z<4J-R0^8K&9YX|3 zRYF|cWD_2deHg5EUP^^~X3{_LoGwrbEU+t(#IOZN)@V434;OXI>Rs?N@F(dNJy9Py zY7?B0IvP{|4jXh~RgLd&XDV9gOPGOoR7mnR$Gr`D4N?xlMsH8)BLQ zJZm)G>5M;bk1_@soeA0n$r&D=)X~i(zTbfmQo37q)#i&EQ@)UY79O`;V8wZzL!BoG zP~z)rQgwtW`TDT;Aq1pOo2iP~8S4hZT2kvBZ(2 zS5Gyy4ktb z3WOt8U6HQtXNJK5fBZJ0X0;x*v0@t()Iz^~bPYovlA}5&^y%kC4RhNH=Phm=KdND~R6t2_=CYEF{G2(O8uw@~zz zY&?=rvlTl3gbQrBM#El-_jfKxD0t%bAUFc-t-n$f=;J>|b*`6I?42=wMFE|dH@<4T z`BBa1KzfT&Nl1_oA0-MY%h==e)G$nWCFO)m6?hC>Gx*#sbzd=$HJEc2g&R&oU23@h zUA#-FKiS}?%}h{yTBK%Sn)x=AA$^cNQg_mmY_5w{Bryg3nmy3E1tqseL{tpBh0m9O z^5U!Xa3J;(-+*mW=lXguUJ7T&*rxBNujgj!Kn$7yJ$WglbZC7lc741rW9$sq$+yae zT$7q1$FUaCH%UmPPp4K)@18w6`|A&tTBPu8S}Kd(Gy}i*FOF{#HTyC~2eidL95u-F zECt)=i_~8JP6W3RzWeeMkqKYM9VcV|AyhuOhxNt*Jr;m7P4AlREL^+u7J6j*2g8 zEtM-!@My64y!Ml<#mc&B1QIO7erNOJ(nz`U2t8;JqahMa)td!p(VL^Sl%CY9NqViD zXBUV-(+Lg>!0Vv?-}Om0hTK8K_|8^@lgfi#u}!Xl{yOFk@x2o_)J<5&>pRe^WUwCg zMU6YEI*P{Plc3O&5Ig@yleymdkAKX0_I#$C_M<48=44Kw?O&*~=Y1ZF0PH|;WV=(D zG0hipW{;-hks(#BbZ+=3zFH!Oqf+xz*YPE{3D|}}U{?I~)n1%#u-C$r<&$;Ip0i0G z%9XeCDHQRv({?6LY$fOZ-YHx^EHr~JZ-#e99nbLI)s3Ue#VLx3D^G1GBowV-#$vOY zL8(cD*TH*zT(za8kn5R#U)}ZE)+3M&q1sShF*|ned_(r?Kw^Z69hVJp_xv4$d2`cQ zH&8+N)iIYLY%dEi2)Vcl=XdZ!2zAg!go8K}T*l0(kKO#EsmZzuu zvT#`M0UF9``xFt-tI#0lxTHK;EIe_Tn@70bL12g6{H@XnkxR-q0@4HgBFk6({GdVV8br#4j@6`+!92DfW@s&2Qu818@&Zx1;bY zvr!U}mqo4>`YCB{KJjF{gLi!(@tz2{lMh)&QaC#9YfdG$IV7Jh9q7F;hbv6%mvs-}niY z)$AGZ3}PF9k-tFFryVOjl|Ms_!RPcPB#51%`$zby4`RWoZl#1f$lv_hwK* zTlUKkqE;OzSRf;7ILWIK!VFSJY0IYpECa<2ifipxXO#6Xf5a^&(j!zl(_zz6wuCpF z!*W{bnFr7g3X1Cy996^p(LC00kOr(toLH){GeA&iEECW4B(s|PM&?tUmTAh~-H z`25WkpRz{N<9O5cyOu5WdYt~gaIc8<5rxn02J>qY&94~C(9t;glaGd6X?Nj7$n0H# z6LaNygL|zJB+8LA4(=XS?5!mkfQrfF|O3p07J-E79s6LNSJ)(7Hjz zuvd5>!0$U445$r$p~3m8yi4gn1u^W9GqTRC-h_mmGTfz9+yJ!nynNAUij8oyAn>Nx zZ2uYA)C2fdZs1o}wto(~_2XD1`i#YX(PJYmyV|}(Cjgf2Y zy>O;Pxl++57~MBbk|8Ei=+%w4_YxTJ-D}AdnQ-F%qh9yw)l(bGQ-Ai$Y$?usP59d3 zESEEx#(=xk_=)4|Q%kH!tIW{*)WKcH*}+}Lee(Me99z$|dSw-7XTy55s~P&)-BVSh zChA;gPSq;bPluo z3kKGEOB|6u2CnZn=aWkdA}U>=(O=~?W93T=ybUs$7$O8CD(eY22wgc^)i~@UQ69!y zlIziK;sDb@I%V7N55;Z3f(tL$HA}eCT-es;36g;a4;y->on#h%BYo9x{e&@XN!|B) z%Ry{sHrH^U&{AM&>u9=j|J>&f9x~9oDY3+Tf=sUKe*#=8Sw1l}Z0P&aQ8lgtIY}K# zAP&~4DL=YFfXhFn(6`AVf@S-~G%J@4)h0fl0$1t<_Il$!Sy za55$zWyOYT*v9`u%uu0uJ=VcQz3z@NnrCmauJU6k*z`eN{2um6lKs(_8=Zu<2o%hh zN71hA8(mfLQt~s}OY#47zCCaAU8+0v&-|Gr-BusHY~H&D9Kbo)3nnAty7SyQv8}Ip z!)$NXtyp^SAv&{F#p4eUfPdaJ5^FPQlwO3ed)=MZzKh>Ln)%1T2{K#^f7bBFRG69r z?Ch)e>>lBbHcD8N0N)xD^XG=Dxebci=j3Z_*X*!s{4&z6^Nfe#Q0TD*rL5V1>nOab zPOGlb6K}kcDtE9hOvT>m|J+Ay^L9mPKCh0+pgp{`+`{($ z$N)C6PF3m07axVKXunb(AXsCU(Z4psuhr3AU2-Na-ae!GRaNzUepMnOg0SB6d}D)2 z#3wuC@}2h^TB3?xZj$^(jjMNSzE}dXoQ1{)d~vRj*fOy}p7eLuG~Gv>mB>B;;m@$daBCM}%v8bcnHzgodphs# zr{p!LVSPG2 zq4C06Oa8Zs-hb})u&K6|1>|($5a-sCex=R-a4+oeo%9m41+mB83Lr*h{QNiKfdfg) zsuU)OS`McbkIp9X*_m$775t!$|0~{jNT+5<)n;=`1#g)FW?#z1w_V*ebCN( z6YNa;PmXbiY~lNu$jweit!`g;3wTlrd_#;meLA4fj-db1)`o%3x1RzoUzpSR>Rad^ ze|}oqZTN+Me&*wuo>K^zw)pvG@@0?3zy1g43eZmkn`$x4RMV1M7dF>tufQP2MepF9 z)&Fh<3Vs%bcjFrk240DWNs0KY^QLSVm9W~2)95L>i6^Y+OHFU-JasrpaDDalk_Ww} zw^cQ?=$DQEKR9jI&Rw5$?XJlAZyjZ2M>#-G#DUSJb9qZ0Nr+AZiMQhRf_o{d1n*Th zwi*v*mAvUNn+DHgOy2PiHeH$qoBCx7(B{bSaQ6gv=el*g`(uS*s1`B!0a?q5C;zGi z<AN=rVOIw4eRX~_R&5N$y=8;(rKflO!xDmhB#4}YRgG|1lISwWt-f%%BF>mTj`uxg zWpx^kBel5r!qWA}uBLx=wwqpAVU)lCpTDR2Z!aH15L646>-4jl+Vc^vSnQkP8cv(D z7C3;2zr2&fU0@MQ?BpAACCtOIhjLSgzhgSqp)zdW6W>d?V(p5vRFGp@&j7B*$pE3& zG4O(T!^(){cyLxZoLk@Z;(Sl(VKmos>vNwvfK=Obbxj*-sK?SPGsi(z~;Of zByca+mhu1_j6N2e48&Yxoh8{~OPnq1CRAa2R>YDu=_|w3uPZjjFZUM78;%s$JuNY_ zFiwSQBmK{o$qlcHsrH=d3H-@_LMpr;e7(PO2U2l`e~YI6rjbRJnB7rqD!$B zB|UoLQ7JiDFenx=ibo_DDThAxXfrR=)DT0w&AA~Y~-wS0-Rp+e_UF)`}UYvw5te)+*lTZ_36i$}Mc3xNsVRc}n-|9gu zXy*GZ6f?Bd5nPdz_pUl^=vNfM7neF$9y*pk{_UoV&Kg=|Vv+wE6ko`dm z_z1*pIUVQiIjd*7Q*nrMzuNnjg+!qaa&H;BL2*wvkn2q6Yw&41!>nnL2D7cfFUt{$ z4Egu4taQiyB;E^!RAy#g8_BE%{Eg+aAzn6k&*xoJh0fsggHyY82Z#Wh=0zfZP-2i% z?L^?rAn`i$Uy!U;%L*3eY&QRCjxqvT4%bM0CnE4^nr{l)_u@Ht2}az6l{}Ioe`Zn8 zbqL#+GI2X`5Fi+xo#pzgjW?WtnX7Drh}7$9PRwG-RJzrNflia(YGpNaKqvvDX5N^} zS{}lVzA(SMDq~=+D@>E?F#=EU&Z$&zEO*iSP|V42@65&SeU-}2(Rru4)c-t}Q{cUa zAJ=2fk2u3*{}$((7D#@r(WxEgDUEI#PF!va6;ws_y=-244^ibKR?6;<13&p9m|e5Y>Y}ywfRmp6B+juJu8MU!+9!`rDWl(B4%u5a ztB&G?O%`sZs4-+SJpF;?3+7zqf^ZV3~^R^ufBiZ`gf$eSR;Egz1MDg1#-P! zST!63Q#8UQJD+=5y_~|kO(5@4`NIGt8w+QplQJt43}N=uC0n~_d-6&H%k$1v>UMrz z$oEZUF+{?-p|6g$KlE62_M*RjAU9@O=|@MLgqBH(skxrr{r6`g#2zA#Wv$wdIw7 zY_h8Vm$2*%?uz`7_k-+^rV)uEenyGnv%AEc*cV3Nv-5Jqn|m)9Se%@%d^^gi)II3u zL_PNOg>(C4)2Fk*)E(Y5%@^Sz8N3`ZvPwqMjq-DpWYBx2HgW#l-kD$3HSji1*USP`x!vwmX8eYIyQhLIlNGQ)<-RZ3#$kiX)3PhbfGz>HI#hwu>aIYUJc4=k#S|W$(wQ2O5#3xpqQ?v zm6HOL>vs99qwtc;$ES2dT+rux!dgsnUY(pvM}CNV``^lZg95abawTd%qg#gmtOY>h zCAk^QbN}bfYML4wY2~B+Mw^KD=BF=-FkfH)w-NOnge@LzO`7|BJNz ze-VNH@1DiejR!b6d6$VYo3^Ut?iR(uepOLBmI;lL>DQQ1Dd4sCv&hB8kD0|oC)3W@ z-aGY5+B>7joZj#7mprG5;yS$ozavU~2(qHMRMvlc#M$a$t4i|0sde>7AvK*+wBhCn zeoJfyOR1^MUrQPNKBLH>kBvL{Jne@t2Fb&uWI4ty$?hjj$uWOzeamH_+YsmCSUb-C z5qg3OArh~XDRSZQj>J`E_vLKMkaZ%usdH{vv85xw!5%c+VSpi4&m0vhHEzRiPDGiY zG$$VInGJj5v*c8s$X=27r(^L4^GGU^3l2zZHQDbumQul?9_$>~6tO=BgC&|jsu^+x zO~fqirfRhvVsYK&jn3UG^`Zrs`R@F!hXn~l7KOaRAZU@p9!zeBf4?$RJ zTzOlqIThYlX%ZYO;jXWqC9ID-Ud3igCbymQC2n|1GtmOt81T4*0j|{Akz5dtRadU>dT<$4Tg5i^yJXX1rKao z`Y<9O(_;@9LcCC|6AjBF^CD#41jv|PdtH|Aux1+_&>K{m*bV~Hs5E*pMH^hZkd zT`>O<(^ER-p5Gge9O<>(HU$~h39VK=YfptGy+$w? zP?1p!_y5rU@DL-uK7Vk0dxn+M*2fBT!ohIJ6|KzelMwew`5+D2F$S^s%Q+J|%g2DPf2$tMgh(u4q1c8YJKWymA z8EiMnXC}v(8)2&W=j)vFf;Nxy1LKU$>5P|KYO=nKGML0C7sZdKC|_FvXY$=cS z5ZLAw2Z8OE57d8q^55B}MAtBLalSc+adPk9Gpj_q_32#pY;~y{Wpd(EN*`CtWeV=B za)^yD`uQw9X7{(`p_#?>82RIM{4&v0{#_ha|+pz-ms+>z&%i;{xZD zL8a2(xXcHUI7KwMzsIPp@T$JNr|1TTOnsHJ8+GiBEip?d<)5v4Vb}ObUn4UR$;Pg0J z+fXTAj8_ssgEOQ*OvQ@hEV2nEWXN8Hrf5>#8Y2gDU$lH)($;i+yCW^#`W>-*XP~2J zpo#oMYao-ApU81Rz1H$C)Ud>A+uvDmwY0EgXDRA|Dr$9rSCqLUXZ8jpg-a?ziv+8e zGQcj8%PtY^#+vubI9K;5!bN`67frDNU#Xv$2?V^-z!g0#?mY3;>|V`75CA4^JX%OaX%X^?Qf38NA3b zETc&b!N?0vpgiXT$7757q-uLGvH*~Bhk{Bnkf;Zg`%`Uu2KEC$_#NEi-j)E?>(sOv z%+%gLc>;q-Zz*fSH3Wn);wNXi;%y%+#qeMrh`;hky!>)fH?AQ$GDSCWx&K*8eySyX&>8C6!_eV2h zx$X=xi{xhd+~1>~MZ2mN=Dm;U&_xDgi;HS8#QSGsgW81VXq7#kkM1D78O_uTTeZfu zzmIQW4@+5H{a3W@F<40JgxxF*U}iMc>M?f@L%78{r;b! ziyxdc&xVG>J7f*%0pkz>f?f7mZ5;_C-91U4!&nc(BK_uj!ZjD>3ALLj7{VrWS9JJZ zY7SLv3r<(mp&u~;?#Og^l*MZTsf%gfvkw?rBC>z|-#O$db}n7+E)~{^UZJH(q~yTO z=$?JVI7=;OWDg_4Pg}PS@ttiT1$`D_4xlngoWq8LYIhg-SVn|fxCTZ~NwvNH(U}p~ z_WHVnj>Cn=CZ{+E*2n+1Yi@Pk6hZgRw-|3Fyac4D11x+1;=ed0AEmE{UAYg?^dK){ zbNau|vi?s>ddD{-LFa26wjMc$5ClW=22)kT=t7P#>oS~PEF&j}*k_b5E&MJ}$oh11 z1aenUAW(2~R6t^g#>@W6_Y=h$HGlJ>^OVH63cJ_cMx<6VgT+Bo((a<80x58Vf3$UW z>W6w$m+gV`{Z;vZ=@Lj6UL+9PUcW`1)y&uKxJk^muF9zbBas_S2sx~6j&>aSD|rfX zv=oHhJ6?0n78QpFdPw=-eiL!2yj*86tj}6!vq0J!LQYU;iavx9yympR>axDN|CtZJ z!Y~Dqi^C4-51xzL=O%CzW3j6azZ*-C&*2R&a5d;*s4bGcGle?@b;bJsuivVs&(8H0)s)q9}EPCY;TZ9 zOJr*6VU2DpkoCF>R(4&iN4 zpd4PgbDx{yWbk+4H4h)D4;%tJUb-*=U(HhJB=JB949jZtC9ycB<#+#v;_^P0soE11 zfjfJaEpdl?zl}qoK;p19L^Z8?!rMd^v-Ig;Em>lA>kdtx+5Elbdl8`QCwx8tLfg~R zuWwF#J?oJnzyirV$6+XkH4l2(85g!f%X?7(0XUtDU(U^7x%{lrN*TKr)0+I|ms^Ed zSR@4kt@Xb54m!HJcx+~HTv=7R!?(|`C(8}t2L~3}ik2-gbmT6gVOXAx1jxb5e}{an zqNZvwFaUUp15%TBpF0_e6pzja{Syhou`U+t3dUBk#gR`*2&F_TF$QkFYO0IrhieNC z5D!G~&@2@5ecq_x_X%4#raVCBR;1_f7B(4C6uxN@6`O;%B#JHNNs=5gxz@ZmSar8= z(psk+?1*f$V}aRP-yL$|bB5^7yt8@b{BBp?XJn_Fa3-6Mb)O=m$Hjq;f9wU&g?2}h z(kHUiL)RSUsparpu4{_P9C)s3m7-?h;smD`Vj|1{{kGr&AgV9_Q9)0DH#&0BjQ#eN+j}R4qDCktnmyw-2bYDn z%X2X22Y#kUs!AuuC-z9)Nd8!eX(oi(*@C;Xb;RCU;{iBCVE29V%BjcGEpVh-zvJ&> z4aw{C{dsd82-}#nIv6c<0$IS%H%WH;gcppu*rH+2rh17=2P|8i!!&EJBEMBGL+fP^ z1=FxOH$PlLlg>JX$0KUL?53!31o_VD5qDF83snt}IE=g*6MMeaLt#MP7iv#*QAJAk z#OF^&P8v?sf0J9a;0!mf-x|?wRS@oe6=6Uk@P5&RX&{~+)MAf+*3@v|#%+}H>?HvI?rwLDd&1-l$^PB&vfo~y?yc|=<%uTYHH6pfQ;P{# zh7l)ArVXv%6NoOfg*Yi(yA<9tI1GgmE}oBTDvCO>|Dvla$q&Hh>L|;T(hxV>=+8OsuiRn|iFMl>4vELlhH?LeKhf!ml*pmhaTTA- zuEOMY%;Z?7D!~iVS(D!hna!vKJg;*!>{h|~=@b5^#&9i)xQSJE zXWxZ}Zr<2NIi+3dsWAL0GSG3F;u8=o_W*>&!jVrGQ2nJr208==X{TXYsKBr%;5Y+2 zk=Xp~vp{>(fHWyRHzZIPhwXU?A+tZ*?8OOOTbr))t-&PoKPY+cgy8kwEsg`B4215wz7GNq=XAdCZ*|2ONCW7qx$K#%&Qo->hrxzm(4AC%jEw%EM z63y^k`|g6{r?4HVw`fuJ*fu3as-t#!2Q78U?YsT8+^(e7_m;p%JcrK)?Kw1W8^mFX zb+J@u{BxtY4n@^Y#34TpnEsBpQ<=91p5Yl6J~VcY){J_94_F{vkzJc?Gi$Y> z^|HfK*i_)}%Hkog;R&~Tgs`VaGz1UzLOg0Frwu zZ2_jHa!B7Q_@5!^mh@;~X%d8E`uZ81~vz0wI zMy4`NvsiOj0~&NOhNcC>sOYok*a5N!j98W0j}Y{mH*CL+hwN^4`IojQd`Mzj@o@2c zx-8`K1sSrSGUa=FSGaF3<7!RB8)I!@knL?Le}B5+W_f}Tn@fNGrIutDVDnkH-|`4G zpZ^JB&Q~D_%&3I?-5I;)QtbWn!J7$;jAo2^sEX(^o}R;yZsYgoP4^vQcZxZO%rU=~ z6j%~Qo+0jX9huB0IM-Q|d=E&uyr}}O`yE!Dc7nbxzM!{5@ai zMAwB*x1cd?NA%l-urTkV`=^0kj_Ee&GEniI2{BYZ&*ztn0Fqc(0=mcB5W*71$H7dS zrP>#Gu9qo=F?-ivIf)9B3qyAIXCar0Yukj#1`A{kgbzy%D7b_xf?5UHJYdXRgpgH% zBmhm@%$#3+OwCPariPF#gJZpjU?`}ddUP;=QDk?3cN8qZFoZP#c{;2vw>gRA0I~BMqkZr>*A4p1sCN&ID z7+h)$4nEEKs+10m_+^5!o_fCa(&%$C6XeV4^3&UqAo4+pyBA9Eb3G>8Fo9QASLeG2 z64iSpDLqpV@J%q;GZ+hZ(IUa@TQTEM=Q32>T{F_i5Im*6VDNVW3>GWH@YWs?g%J2E z@VirPvXx1!MJknRa4iY~@At{iFSvtx$&&q~y=Wnfgta4ynTaB*a!6Fb&llZoiS_4{ zEO(n0tel;p#d2R)ZM-^SqEtuijUAGxskV_}ck7$y^X+g-5}ZvT4ob``y*s+zJKx0* zgxEtU8)Ay?8BS`iq`(GV)v1eD+J^0Eip{w7GRHJed(ZfUY=m6OT6F`aUI=nVm}+oR zhX2qxYgzxa=*jt>RgyH@4Qyz+rep9-@RoydfPC@IY;|(sQbozp)tngPbgaF(V`B(U zOKM=f%*{)3w?=wI5gf`yZWyx^Lo$Q``Ml^%FQ6y$<@9?7s8r4I@^AG1l4JI83)4%W zu<)CwN1mx|Q`E0C&A9_j+D>C+ZKbzVhJ5SsuNLo>>A@w}wVR0pYlddclM7+oD$RCW zh}^km$E*_y$NJZ!gb2m(VQBf~7kCDaU|Iw0(|WyKbB7mpnc&;h8}{EA0)9L^R0w^N zA$RW=k2yfXv^(bzbil<8d_Ohu!#KG3?L^P9SZDl*AlrT>!rPtmjp5z(lWGrZ>d7+Q zZ?%b?3^}^TXLbf6H|9BNb5_p)Y@?lZonFLKPVU!&gq_Q_!k96OvlN4Yo#C2?@Ch9wX|rF!5CEz6>0gsx5U8 zH9hI(a%);8mI)rNgho4?D$*M@N*Ar}n8X8xy7e;0wYhcvW-!YXK`oG#fQ~B5^@yG% zG#IZ%9A%e3h}6mT@2_}|kS?RGHsl$nOKvR8@$H+%)2lY@1TREFdIunt z9N+%Wl-=IA_2E>GxP(NGn>8P>)LOu2WdA*{t0O-8xcmC;9i4~W1txpjqBds73le{Z zr#Oi=GeU*Kgg%y)nzHO@*p1r*z`0m0!z}}d_i}Gq&ooeIkL{}Kh}1++9$v$3Nsp3| z#!sT9Hi6U-8SrAq4IhJ>pk-$v#H|qqlW>rN5_u3HiN8}h$UeXKn+oDPf1JAaL!nx| zH*RwUU%AV>h+f;GQC3&uSRW(<%Ag=K%26XYS3&i3cLm$TQAF8%)=R5dtW4jDh*rMP z$QdS5v#~v%=3uKn8g;HlSUVN13LZrdpIWq;JZ37Lao#$6iVV!pmz3&zjOvExh>D`) zgP78Eb0Wf&KcIEe)bo?g{rUNKAIAN6X$;u{9K-l1HRVYa@jvRG7mZy!-Cb{s0xWN+ z%&%LPor|a3u%}e712x)t8hvu_ckCO$lPXz9a;9N0qSv!bsYWUP7j|ODmhM|lHdGs{ z;#n-m!?BZ(+g?J0()SA!fDP)bL_dv4A@Mn3;JjzTV7QJrY5i~`9 zlD(Db9Kw;q^1nWqd>nCmLz|Li-{zh-_s2=|_~4x76=DXu&Z&fskE_Pci~E-^9W~mF z5t3=mNAjMTdyzx&<+(Sesx%vuEkbO53NEu*U%z zQ-dH^H}m&)){mKl{9L)G9-!Cwd%~57J_ZYIg)xh3{L9`qmDsJrCgtOl$UD2Elgv9S z`iY25O6OxF(FpJ_!JuP{AY(*L7%4?y#k?7MU!;k&%g@ zJ^Psc0VQZ6SOf%!y^Dr`^$NME)^&&Ow>Rt6z_`z)Iq+oPY`rDBF%^izmZ~ksCat7Ip)&teqCp0O<}Ka|x`08zC3r>7 zDO1rT@Dl?gN;p@bszMbMpmDC)T ztV1QbL~@SeNM0BE%{_?W1JD=~>5{P$LvhTMiGTRnM{(h+Yss&gSNg2X)u4X=9dt+| zWvo{EVS_n?#)#Go4sho@d^(!e{---Zkkfup-2R*2ZL z%XcKx39k6K4cWKCqk|{+TX#WZV-X%J;Lh0J@ws(E@#d394sLa6OTk4a3We%Z!%GO9 z)c=@9ac@tp4u(}pJcuS~KawaV94rs30$rV&%Ij`QA?>^IhS8iAYMCpWjFlF&FDv&!VIPl<0OlBVz7Yv!4!GRywIP)WC(sVAq`70sV z?qtRCMaaI{9(L_1rE=l`d;9^%X*p$5)3!BsAx-kNi}M`iSYXJ>E$|l;tozF6h4*o{ zsJwS@T?EbuS<_QMC8g<^B7#R7%aq~JfadUa9Oc|(oQQ$bzwdiYGt^)_DhluQUs17(0(H)JP`g~ZJ~d~=Aj)Mit>9Z ztjJye>4wqmcTD1ffEy(FIDDm*ssi4tqmk@O?2FN~1U6=@flwKF<+$sXkPTanl#<#& zlO8;|{vv{B%z~7?Bf3IG?(K>zV>bMwzsLoA(--;PCP$NKEJAU|^h*x{#m zoOJD$idUt-PVHP}$q?2+5F>1COdPCxtoSFN(iOKtz%?bLTlH~GBB+1sd=sJ5DaBe< z&`7Gp{!(xw?fiTv8bMD@@nJMzIL`ZeoU?r&4q9-MP<<1G_thblFVbHB2zMu#W1)$w zV=BAd;bYI2s?-FjsDL4jzcdMW1K7Uv^PEDf1|!@};XnVz|I(FS&)LZLMFkA9y!=aJ z>;!)@GbiuvmYKe0g)xJSsQMHLn#yRnrrS1k0eOE;Uy6m!1v<@YTkbrN1C-6lvd zLp0c)S?P@GEq=8xD%a?K)zgTz%yN{hexCVC(5Zr@1L4&(%@ZiDS-6gmOcZcBa?hda=y8HC6PoI1H-uM1| z8G|v3FCSH_YOl5CoNKSWOFh4^@TWzb=T1Oje}DfIEk8VioZKR>7dJa0Y_|#%By$r! z2EOODR6}-tyq*+AsFns=sstO9a6AK~hLe6`6|!=1Lqmh|eH6+Ix@5AwX68ndb}@H0 z9119E^7)~$zSpb=z;)2zcK}IVz5(?c)LzgJ{5N<2AMSwtFF*LnAo*zUdsCoXM$SVP3^FRkbDVqf57c;nDswiseV>gqxI+8c;|Ucs_TRHqeBlz z!JqJ3*(4|g1|Ulgk(S=|^S6s^@JytPwpUUi!8rW+%Uylo&KvHV#&h}MdV9sHlGTZ0 zP)&6QX;HPuuMb`Dtig?Kdsc2A++j5ram$N%eR#A~;N>T_kd;$=M7mOs=)zu5w=_OP zGa)Rg_QcGUM>^I8N8n}hns>5|HY7ms*PTf^yPZI=QfeqxAnl_otEL!JNP{( z%(~BGDT8T|q5?m7<0XgzEDywwKryy{WEjz39mJGAq!!@g%NP>oacL%KZQT^%acL#s zTXDqIkr}Xv@mpYbM`R#Qch0YkazIGFyLc~M{ev1wZ!J^;3kwI*2;cDC#_7+^q@Ue6r;AXrRvpl75xtrBEd$=!N?DbJF7RY<-)wW=Zdb_f!B%QY zJZ~|?cTllEUi)9!@4hYXWd3w|T^LMy#iyO-sBT)R!_09YOzLw(3YK^?7ccV z?_gmgqv~z1jTzyGz#wNy#u^ganS1-b5c5+T#;p;uvFwZVW@LAfr-#n;ajP|A2Yki| zhY?t%VwDd+cmj|l9^)J^S%Mpn8-YSih(b*Cr&(luF0a6k=XsTQkPvjIpL+stv&C~iJP zQYIxiIb z>TqOZY(@4JsRQnr!}2gNE{(@+2fw=J@2TdXmMjgv=i=-3aK#DOYV_m2(GWuQ?L>ad zC+ z^~MxLi&;Vx9`!?3mJF34GyVa~A}u|a;>0XVM5EH9PG7WQ9s5abi{ErgI4`jxtTG*J zsY;rA=(#T6`WuO=)R$r1Aes9)T(Nd`vTqfJXl}I0dC7GqKL%=&q9Q92+uSoT?`H>U zu|!m{3|KQD7uXM^_p&5+3RCLbO0F3?RLSvMy-gHOoad%T!GyXPSz#nXV0>UNQzw2Q z%~`V$XmQE69hEqoq;^Ba{j}BR?vsxk%kMF$T{o#BAf$>F>r+{minm*_jrnwwllA!S zH6DRTsEoduT~eMrB#=?Ua_3Nqr~CRyy0A`CX*1J1r}iAP)5FAxW=*=8ahnv$DR~dt zvIsxc0{W%Bi#|+fU_w^Hp*)M%PsoujjBucU~GDLW0CD!5Ec z$M2SI@hv@nIrxRpzZpLwG4DiiXljPq?z%94$JvBZW=$20E(39a*>P!g?v|_E+?8rW zEFZTpBzbi@oklDwq0O(>p}xWKpm{Mnb=vj?JFL~?HKn6ID4lJOfqC$=g*^)wqQcdl z0lMxELsWywd!!{3VXpn$C7rhFb+OdG%kRn6u>s^Jbu8dpPH2uxWUNgZlc>mY^usEB zQ4`EJiSZf*PdhfK0`Y^%q!!Z=z3ODlk#J5pm|Tt!i84oi``t4*g5}ndhi+`unFJUwgD` z*;0n%B~vxx1jc02h>RZ8QXiKnuAcT7YET_g@ZVNVpWEGSPB614jclb^khNk*izO60 zGoxL0I^aKxUFCSv^%#_2Ei`4wbG<^9VXrqbefsjGz4eKBcTvzAh%Tb+`))wBNk1+S)n3*Vbj` zet>0TuvZQ34G`jd-vZ$WQm)yG72cugNI?z702s;Rn*v(71V;g}V$=OTWg| z5%C~ILpV`dlr&5r0(oM%JJ@sQKQJaKw>CMEifWue+YWDv2BK$TU`{BVtw<8o^SMSJ z_9Ti}B$11tH=nSFYb{v5$5Ey=yn04ufnq?I$s0Mn8R&|;6aOdn{2`?#=<+#Is5?){ zjISWF!G>jb&m+~<3{D7NM*zLmjjEu&TA&{sDT>#PZiJ^MCKH=}sVOnfvpi^1Bu#h! z^9$j5YDnnukhij)WU-D64k=)zA#_LvqbDQRyNaq?RxT13- zq3tJO;f)y-s^4h+eB9KYKxA{pF?u5V38C}H>{0FcYK=MWN&TsRVuI_ojhWuKDmQV` zTJxe}*6IyWzyTAiZ$8Pe7&!;_kIGj;!`Nca+ERpb)t+4QaLi_owB+_K=&H6?XA60} zs43%a()Icn$wDxWESipl-hgC=r$Q(dIHN6{SoqKt=d1uqH(Ge`)ESG1Z1SMb|Lk;- zc5)P#YB9&Zkk5WG2IF=;hJ_Kn&KEgk={P&qqaHY+9;jX&`y)2=NT8#>+m)O z_h!<0w?{rLYNm-cUANuqd68hZGC3RrIPtX@cNYTul-=$!C@#WUF;Dm@JeX z`jCQ!hanR7sNi|s4nr`3k+riCGV3PTyeP4ROxXr^*PhSP72g@9x+<8GG81#-xbLfO zNzS*k^GGT#t&E{U+gfTt%88+qH-~0?B@x_X-b#^z#-JZj^=7w^F`6i4SzZg_2M05= zqRn(D=R0x~c4*m1<&P;%!R;-Ng*mk163Gj0S!kxd-bHgz6;x!lwWiUm)w~fySKvCS zt+5wh5{3IboADYWg||LJV5j^##J7X}B~vFFS{o6a?0aS`S7h5yPF@>qi<}A2!mi7r znuuCfG|2j@G-58#;6-E~23+i6Wv%4AUny@JJqBzWId?BDxtxwH`R(3VosGeHLYIyM z+~>-PS!eljzI18Y7nz?)C*<|~C@R*k?rDn~Gemx}_!i9LRCk@HZ`uL)x#S3E&JsCes z_^$DxSM#M&Z=AEb%jEe|QH-uX4FagS8}NzV-&<*2aH3Xn0{V;}%qudwEAieupO=V9ZRi-~E2&GDn|J+n6tFr=+l$)T` z1Yy0-n54*SDTL#v$mjW%7tz$6TQT#2fxtsHWc5l!wUH_+q${CVBXyj)aIVRH*#jfi zk6Mf%i**R*NeSJ!Li+5y@d>g2vM&IYqDmE*NV1|&%iOZujFvMlet5F^ll&kihE+0G;CrnEhahx6w&1&*WKi{^telScy5>Q0NMFrLkb#ct1@}+jY zZI!XSyS>bFGQ57#berzRn;d2jr>qBHR!2*n-L_aXH-F)@uQ*0rpOIC{I5@t6(DzL48)?moOmZ|<3*dA{H?9rsl8un!x2FpqOC9_? z55=lP_d~at1C5$Dy0vgai?w=Kr2WbqRV6lxkuY=+W{A#|cR>r`q=JnrNi~Z}ER02O zvbto<*Ix2moTy_+g}k;`H%_zW@(lD8;HZ!H^^)K@JVcz zOMXX=Z`bDjOHIx;3o07q6f+W)mgMJCV+JWI3|f<+rokB%jTU^2PZG{|w^*1m&xQA; zz|6*s99LHAJ6SCZbs*leQPcZH%6^pncN$*xN&3Yc3`$`pL$s#tMJJOVPqZR|U{Wu&Oz1I$t}mp0 znR04t8x6s+6H2EKS;iDltfxE2aGYh@Qiiu#MSbyRAB{cwVj92p)XvdeeTD}cJ#o9j z4Y!~?dQ?M0vrPuGtcTN1FIrTH7Dh;G^{Il9V+d!>GOP!4(O)!iceYZNj--(`u9jx=z}Rntms4bRPZMi^k0?>?zLQUBO~46%)i zVweqdaN-(iR`TLf5Gt@`HMeu?rKXEcCowp&YCsd1%N<(E7f# z^oB81|6#|uGI9?06SdAc66<#Y$@nRCF;9IKRVEq?zK|Q~jnn?)nh0%_K{U{M2o3(D z7UZ|?EsQ-4)Wf--TYPBiaJfG3OxP1@V=yEX%X809B0DXfSwC+78OnB91)8k)qtI%h zaB+DYs+7XTZ=>j`EpSO4G@g{Chz3>O(8MdW9cEv|?^n`i13Ni#4~JydTg~CIt4rm` zhA2h88OrA+?-EIg3m+{sELMU8q*ZBDbgc6`0Y7Z*IN~jF1)>^bJqD+sS?wLOLZRvx ziosH0>^p8h?WfDC2e}ZoWx|69d&MCuY79G$2P0P@S@Rv$(hHkBCm_grTEoF^@U0n% zx)g;ytvC|;YF(XX7b9sxbbVib-M2K|jxF#7a!Sr!u#jIz4dD=Mv!Y?gB%J_f-^*Yw z27_`g^AxSOXBaTbt2!G7M{U(u*lMVwr&w_J}ApSX> zQ>4n#;hjekko-`UPZ`{o9NXQ2gqyXa?7RrI8mNH(-~st9DC)tHH~S+_hj)%KQ1pu& z6qKnrR5_kpp{6Ekg-0Fg_920Ew8<3wUF8aVM`3MT>|w%+y>h2RyuW0fF0D1J$2Zuv zX1}IH6E7G%do^ggkqX&OWArG)$Z*e7=dMGKy}FOf`lyT3#alk8JrH$G-hoAaB>Oh^ zZQ02$0@zVm;M1YGdvi=~3+Abiy}s?PZC18Wim-nD$5y(YOlPaKG7;r-7@X&_pL@?K z-_F`0Q(ZcitRIOnPBvts%6+LRGRm2cIi}4ie;R4F?RiS&8Gt4uYZ9xCrJfs|29TV{ z=mM=r1NAHMwFSG`(T4KA7N+0eZaf$g&m-Q0hzr8GhdQc;WY_uO*fQ@9&F*g@ZTdl^ z-v&Lk1WjIhiQf^Mib4JOIrOlieEWrvtoUt7yoPhrjZyDI9z2b?IQ@tup)>oh<)zNp zMOs-{%3jFh*_sJ1&aBMB4<%9HXxZ%wjA#|OR-K`}?w`U4hKxZ}GYiRaft=6yun|G`!)i{+ZyvKq; zj?~$-qTToH-B;~wygpOca~l>7#|(AeIF0N5kkh zMtPUi6%nmZ_Zwe!ix^nG;)gxuk08X53tgIBz{})g$vu>xpU9 z1cg{r4Qt}4T>Il00238_vBKStSQ~uJcmWJp>P`MdBuN-)i(%A-D{Jc9(=y((s5G;_xG4eSkOIrLv|^9Z}^&%8ny|5hP?a&nCOw zIF<(iCcel-qIyO;!8YscC6UaF{^p4g%c^XJEZ-~enj2FW2io82h?S?mUSJ>j5^vc@ zV(Sl4J!P>Kjas~YU3K2Eg@f~mXk!CA`o>6y$zhbvt1SM2&&=??eYgjQ<(~JfKWUF$ zXXZnNPBp7c6!e!kZ(VRHFxFr7D}?A~Z-+*AvR7=muvfV3ymXO}(dIB8`#jZ1R_|IwS_4VWZL3PZC6vQ@ZquKG8O|XkLsm zSL@Y*mC%GIhX+~tsfgBFBB(}{nMv36#r74(42a-a zN@Vl2{TBD>DCyD!fr#Q|zFHI5kj~&UCk;)Ax<$R;+?*O#D7pZ=Ny^ZsHH&rP&e#)tq7b5HG1`f8>eZS+Bd1om76^h3(X?X z;+!J<@vWPbyl7CZrM`SA6j4~k)vV$#vb>0FRN2WzuhQ6&T#;bex*y^q9*ZMDsY2k4 z87}&DYoXtM)DX9#S+}nGppRq8hH-FzLz*F!XI7!o;69(GNGZp+dg{vFhnYNv=RGl2 z3`jnP^+Ho*{_pR8-~RhNnY0%6S3C{}DxkBov%3l^U^@OI9EeZ@{fW62$1*Fe}J;VDtp~G-8d-2gbH69hKqdS_wiP?)8L&~2e^}8n8@1v zg8K4b;V-08Ybcjg_{_Y*=VoaWMbm?q1@ld$7kUN8hL5BYN7H8bdVAPN^|}V!U>ZAB zhS3o>cPvB0I(?em`HXqaT-c6N^IT#ncNxddCYd!eG9e`~a7^>#@X}TR?)&ZeG3Q!6 z5@#{UDjIC4TFbnZY&fHYpFbd2$GgcQnQ@aBH&^5e^mAl{dCpuKVTZP!+@vHnM#i?Li(q z*k~TzqUlRVMG-*u*xpJz;fY%fJc}Ku{=M;2orvjrPW!;`eG&(26O=kS&+sK4w<|v=bE{a-x z9r!Kh#G>*~c}AHXX-tH~$K;o#9M%$x>WS{FXgGE=qzGbt)+CRw0>`27P12 zncS@Avx5he9lS;vsTSX4A6`*E%xFRr-bEY-FK#Tf_;2E9>2R!Yl_&J1j*#E8sWb3K zcFh?vB?-S5bVqGI!5nd)8!Fch^s9eFt4Gp+2UVmf&iBF9 zQJNc;i=C{?*hT_HLl^xoCU~h9F~%Sk^SzWp6qZ)e}tDS#{UA`zzM)Gx!fG zu6lKCl6(E40S;qEG1$7ZO3Jze@F?N2n(bNqTY4cDqC$vC+JOd_kH)`ZnWwzAGMuido9d3oRRQSEto@@YpJ)Xxnm@erT0YLp z$jE4w*o~IX(;Lqgo1B`GQ&XE;-n0|@-8qBFYNp~D$GD@Qq;%y7qZVz=qePUiP(sY{hQ-jcJ!b&NdN}SS^%^FIM;Ny0PucD6 zsUwtaL&oXH$l?m1pnHL3!}YM0zKe}@vQ}!*O6$1RuYGn=LR<6nJQCqr6ZPgocyJ7I zXUB1dJJIa+0xQ*|w#9zmmR=9`bd7i4_?oME4_hsaQIBPWFEOa`u=nPjlBVWj(PLlW z#XDtf?eKjwH2Z2TkU-3dM`^yP(eRNE90v4j$g}N6IA#;6+J%oac3q38RzYR+3L+AL_QH~~dWh*>bS}X#;bHRcwCME`+ zEtHJ)Gg**{u1X*`)DJdh6MN=KXZGt${(k9{Zd{zucAcH$%9kQb0e;%NU!J(gxH%RH zMAN}DXIy5`J3Uuu_G|OaF7HfyDEBT+fvZ?t8y`1)C+$g{ROUE{+JrH%7AlM*-dh*l zFA)WJJ__dM@1^tJPQ+v7SFGPRCi5RI?D|Y9ro8)nL4lQU1OZI{YnZmn90J{j$1biW z0ZsczpNxtYfG?AN)CMOOmeNy+@1N~2e4-V4QMxk`uXp)-x6kFYw>#KfTmT=`55eRK zO+4SOv*K@AK>?JqKP=3+k1VioPrPn0pe6RFe^Q;*>Z{93JiZahms9wJ1EfgZ!d`i@^1BJZ@Z zL`mVf*cDYq(6QKO(GSfO)gAVsq-5))f~u(`tu$L6%itTCg)jcYHoNstzs8o^%N_a! zv~AkMvYl_iW9uc{0m`vOr~>fxjPE4?_sW~NbV+-Y9gr`_WVw+W5%r+#U_{6nEoXbQ z@{OxUm;G1>j^OL+g51-|^6~ks>N|br=ZkfNL8R;Tt2;us*ST#oO%)tTcuP1=8x^Qj zH7&iCh^$g5auynGBg(#Ko-@ZAOn?v%1k0sc((yy~NqfCZNEvNC{e|5&L~u#k0qa(3 zdVwP`h|h98A-rhR&M@}z9N5I``q=Gq-bVo$k}wpv-y&&Xsk_qyBV@L?pi zFjpjEy3y%ckLyy8+-f2a5&Mgxo16xOHJPwnmyr-Hd=xo7%WK&;b`)9RaE49l=<@InU78|*8yHm z8}EHj5@RffSla2-*pDwHxNKc_7EiB)-njo4y zE5nC@Qd%h%-cDP+DITECuN)Z!0u6yZr7__lo0d$Ur)ppCW(=wiH_|nCH*&_v5r_aC>H$oSPhG>1>*xuuwyb-L&7CC0wAYCv1_{JsAYGTGnXf55|I?Hl(bmZ?>v&2 zdwHM#<8S?+3_2QhR$rGNj~o{vce}fVpH2y$Pq7uJ7*xT2HvtR4V|Mr>z?q;SI#*`w zFd;l)yTy>-H-->y3Z!S;r~JNwFQO6n+02%t+ip{jn%xm^pf8>mJ6?RK&H-@rPeGaK zg(ffgESbx(^rhyg0Uo6L^UFmh%iYP;mq*PoN8e6GR1Ojz1`<%{HW658IevrZ+dHhl&Nr#e|22>K1-yhZF49Yv426Q}X z_Qe#*cUM^%vs+rc|1iytA^`8kIrH0ypedeW>`;puEcJAijP~STyrxz zoD$PtXv9o1cpaoPG$vD9McmrtE>%J??YwY(sH&2M#vJbBe%#X6)6<9#7v8vue}3k@+6Vjr;Omf59%A&tuj&=4F784}qZzDjb7ZIAPVR+fbUm{?RZj z$m}&DwklE~){27J`vAMT}7TdTq%k5!{w_hW)bMfk3xc-9hsU_bIMB zIt*Tn4n%JiWND(}QDOSo)&gO_p2-_%hQW*E(sU`^g4Ks9x%vgX)lv=}a8Eto9;xt=s>ub7VqQ_RY=J-+vW7b0v4-k|K^}I zOdG!jq2pU%h4qLaI&Kfr$?Q^=0^6~Uw7VA+vBM|is(8OWe63qk+oT95LxZK^=u`BY z4HjNY=Tb3IlA6usj)x^9=7rey_{bRw6AtN+&=5PEwZO`n!TK@Se*; z{gJ!uRpDAR@6Y`G^cT3loiZ34xSie@uR|cc?HEb4uZ3=izC_A)p_Tu*6J~_G#95l zg(yGu^hFonSH!e-P&=1wNxKekXq};TX44aWzyW-GG6zNF{jqmKVf)W{c0Y_ixAb=T&b9qGTT_Rr(*_p{rx!Kpn zcU+nHlMk&=Gr-#SAlWtzkT0j^QEY=&&l%$JQr1#*NCX{C7h=hDzeFy5{RXRQIAt{< zM39cweb^jqm<`b>VWjA&iH$0(uu-?U=$BcK*O-w&C1>mbaJ*YXcu>$O48h+pp6`e& z+)W{?3EbOG?^3noe)|#&K%)xpvfS)H`SgksQMYE6Yh)c*1{5F$lYGisz1dTVJI?}R zSN`_klTd=p9 zrMdk5M;tlQvWiU^F6YhAj*n7lqs$G94MTzKnV0s-W#F=+<)Ine_Yg~N*JxU^7{X^8 z`409%i9^0KGg?hBHaC;-78AV0+eQ24Xl5_AUXUjc^XN}mPii5)?UA+4jyMBZ-O&dW zS#D0F}%tJrw67mSw8NUE1lnXYaKIY)&^6 zO0Jd0t!3pgyjS<>^>VhsmF?wMyaUj?Ak}48U-eLyNeH6APeqgq^-+}|dbc-NwyP5* z0cDK=eJ?pM(-YzMwB?J$ECuA^g9l$>p;i)jqUGk({BTv3bMwwBaVBPl<%-6p^Pac! zTr6!Qg&w<8QNfix7b2oprZ9?umzpEXwvS0B{Q_!N;HX+*(MT4{3|fo}AKbp<+1A%0 z?dzk3dfU$K?l)Livya1>@_^m}=Fc#7Gcz;q8hxpC$n+LR#CS-M^ieX&3yWN@PfdYD zBC1(ug}_Hp&$}LetZ`Ryq;VM&dNiGuH)n6auXL^`zuK6IK3PvaH>H^J+#`{-1ki;P zS=6_}Sr{c_0cB)$V8HC$anw5==-o>fw-##ggf%K)Rn4ofAkGv3hykcFxx87hfUi=j z`iws5dR1xg$QtBxdnE%swhw@o0B1@VzzAqH>RxtXXs~=&s`hU5f)*eu&||gS8$8eK z&M&p}R}qR_kU79cBzBy5zsu|W5pwG41A!>S8$Q7=xx^R9CGUXsHNg|RBs4UV!eaL- z15mN?@$nv|9|4{PL5wBCJ8uBsfzLQ>4j>AArB;L?+{R3H-yin_35UrV-c{KB%a~(=3w%K@bQg z(-QuF-`M$95;teaPGGv%C5hgfUtMrs!YI*C_I6obOe_haHGl6SJ zMvh#f$>U7+r=JYFSnNYVjPvkyBjIq%f^o9Cc z)Y|9txQB`Ax~y6CUwM9%ULtIY<)BfIph$}av}kN*JUw=3K1zEF3iMdx29J`4=OB|R zJCncsebDHL%$F*;aINE>Wk%SdnXK#kQ#MPSQYQ{g8((4;bFRZ;P4GNk_h9+=jWGcX zr`G*5>#{&{`;=IHSAfwL=y2=-gmA4H+$JY|(dlSutm@PPJt57WspP>^p7 zU8=eNG^9r#plG#ojPN5%rtQsysF1PZ_54rEl`yDJ`L`$Ud2Qg0_97b-R)CN&`*HsWE%HB&m{P0~Z=S(JV zQZb~_?a~Ej>ER%^1-6Xy(!mlR#gb+x$eb+glkdDcdy(Rq7`?d0486><);E^X zND}V|o4f^~QZzz3w76WC7TeX!T6-;lSJNp!XlyO(AGF|k5q=kPT>d&+5uEZ*z0BLC z0^DC$YHnrFY3jE$&|}(v z?b==cfN6WAC9oEVc*W)D8z~>!89%gG3m2?sxP{lrwY+|aG+)|)A}lbMk{!zaGtBYo zhbm!KusNY{E^U2M!ohk8-~2aeZT>_>PkZ-Q{@;RC?xZN3jL7|r3}}F*Gj|Hnp)va& zBgfAyczV~a!Th4Ys!@JYNtQLb*}kwx>4#+90M>xlf|$vs?*?_=P41gx8?%NG_q_?) zmbI@vgqI%|9U}nR!43}NN`q7Q)hg56%QmaE4NHs6gTR`%L0o4^xv@=(Y*DJY&~3}- zyn-}?$y{NDJapD=q=YZm53;_qIlA{eT+}v;*-jxpp^MOi611U5jb@M(MWaS-pb}Ng zy3j?75>8Q?JFo~sePg^3Y@ThdNNHkN`z8xbG?-;zE3?^Zir-f5$LzZYGTa6BaR(Z@YML8^L_s-&A3IHOq^VI%(wfD1W@wz`JkaKE8fMxy=2r zbUoY1)$B;NKD7Ho94Zr2DA8audH>jd*;B8JVB!7?{u6oa#`xyObm_3h z?bPx{=Q0cL*@*dU18EA*Gri|SXWnw_$Y9Yk&+_By7tZH&a&=DE%1!M@v7^?f&5dBJ zI~hLdb-@;#B&>lEPsS_H8P`Wi|FUN6?6bt$=g%(Fj)nu<&-PCn=QAxyuGF6AY2n); z6gj)7Aa=eu8W;P5!30bMosXW6 zw&Qx#u~n0(Mv**XaZjfWh8pg)eUO$W>DxM(8;hDQ8rj&mjWjkU7s3YV;CD;IZ=Aj* zupYb9oUOO#&VveQ9@4{Nngausx0!G`Y?aQ)Y<5b7nW_#S0GFgGpWB$_c75;*@w~{P z`3#uS_9}^&FY^01;TNtfyafeTiv<9j7_FS(vE1DZ8T#Xo)lEbcTiZvXv>nSg;uAY3- zW=b`aT#uWTzd^2SF)@7~V-j@UYy03otVxyP?~Q64)M<09-l#{Hb(b z6ReI>s+lo)Tc$Pn6w7stI)!LR>a(34v*}E!RHWHr!{y^L7-*pw)z|_SgI41PifdGy z8Dv!@pIvYKVBxHk4d#`UQtMFuMXf~1(9n>Piz~@-$$6W={drp5a`Uiud3bP8l8lUu z*D4g46edkcN!dL(7<^xQvk%ucWP1NZ%<&-7*T;810iQyb;_oo}Q3VyajA+&DU?Pj@ z`0Q)~i|Mr6ihb+tkR`8O%0L{Qk%*cedK%f8k*ijkQMQNA8@8>iUNxE5N7kKFU*l7^EKw`+t?(bQjhb`Mj6m$ty#&f&vH*TH)&hT?oSf8lQK_4&$gU> zDcZe)X$eEUY0ng`snmGLpr?wvRCi~y_3viaI>E%rT|6Dko~wQ!ak=PPPSM@Pq9=r$r_)+D%w)GSH}ZsRt;n!@r$nEH_hYJu_o{-V z5680UyF?uA`(F*fm7Ui^*=**`Yd3)a&<30zWDyORWpcSU1-z+19Z26xZs$YAs#Roo zcz7|65$Yw=_qH?YHD<>5T!mnuCv8+?^sZc~q?c0|7#fn5C{Z6bolED~1FJ5_7Z)F& z6}!Pe=-y;MGk14(L`6hIb}>O9xSt_^Pwv0-eEESH01Pq&T#+Kjv?;!ulEAZ!O&FT3 z&LS^Smf7669amKu20D~=oK;(?xoT&?zXR_GKxh~H$gdE#eSzf9q{s<3kVu38qM@uV zEuGxIoNP29cz6s;so%ep=QxeD?J%Y*YXt|zMyNSl6AZ`koi6Pw7$wme-d&f)b0-;v zw?>?HA@#sk{mlnJGc4Gl<0vYh8yXuWL_p|bRkIqSmmv-y0G5QgKwhM|sdV@^N>`U; z&TM^Mt8Vv6BO9Cb*zOh&_zdv^G9a`+ehAq3pB=VEDioQE zHfzbu+^OwS6w8J|LxE!BXC-B%4HwSpuP70 z8E`?t*|TP}J1&6Dp!8P&4yG5Xz1K|15tI!A43QPOKDr>_Ow5wujd$6lL0$bU2=a2y zKc0>2xjK?QPU&;9+3rwofwyX zfnM&A6wKara^AJf%f?FEeJ5!oL?aWqPjcQs;CVqP%~vovHhWN!YuRWF*t{5_^L2`B zWNUVvPQS?xc}^T~CKa8DZ4t&#oUhG|et`eY$o~y$zvPP+BW?WozjHGux}JW6m$UO4 zk}h0QdEx`WKM+O}Av1tLVZH7gLtFqE5H$n&R{}D}l4q6EMBS;$e!; zLxk7gS;!eTrs#d~6>iBFK2qksq{R%AGyAu^_)jSkIMh+p)?S_(Cj^z6GGhbG@TZ-l zV_X=Z+y0Bz*&fCnll?70CsV|QBF&hwV%YHrVb>KEK`~-4=_Nu*7_v3?JIU;9TDzqg1zsT9An*+T zFwTD>pZ|LJf0(NNxE@RphGzJ0 zpBacI(y*#NeIM%(1mMq0j*^h%(dIrWIoOvvvxp4txV>Ua>l?7mv8@;fauD!{zuS@M z@o`5Bj)cY_cEB3`BOv}MYX5hKUZe#|^ARr1sil|I<8mq)xG0Dq8iuNb)a1d2`I-vLNNTc7u0R=GV7&#yAYD24#f{BNWC zuhQa0c?=5vM|5;sA~z@HC0?8{xfmjPWZXK1_8kqjr%#jdafc@}vol|3;Q>fjT%%`n z-Lz2`yE*t09v1E7B#Hm(T%)tMjs9!|%|La68 z+g=E)MJdV=1Zm8CVHOwB@!LH*qQ(14n)_b_a%+xmS?cdHMUEDSp!7G=1~FU7%Vn#Y z1u+-n!f9rQ{-z9oYyZXbRsyX+Ixq#BRWk5f)M>@d*Xs+jZpYt_^|KXO(Ws+dcno1l z2)xI8VzJ69KA+`ZvYg1&@qb~Nf6b`2560S?q)MWjRv1e}V1n)52%lercJ9=NXX3zSy?DSjdS?>yABs?XNw zyqUZu>bugc!fyt#kp3@86gr6UKOs9b`G@!68@qZUXFH2IQ6F3>fxPr;E?JYyW!&Xb zK{}N#1!7JDNYd!Te^cw8^vMO|s_8>Oysh|8H8gxGn~^^*bU(TNhjJfAFuJt`vc8p% z$?T#hyY|yZkx2cTTismG=b*kS=c^kyEbRxY@}0kzEmj)-M<(9}1R*`rrQ2y+ZDko- zU!xc;;`aQq8`(=Pg9w%Z?App8-WBckaH0rE#i7Q7{LgOsUwL0!)07Dga3d4dElQlQ zsOv9Ifp5sf!;xTr?al&zOD(cy|s0SuxeE61i{o$JjYIY<-2o@8!8(hypeqR&H4B3dT5}Gc7X!_Duss zef{OMI(3QdB2X5g)ow{vD!P19Q!dpID^VJlt2UjOoo#U(k8WZEfeaR9zqiMVwOYS_@6y%L19reeGwLr@gW#D$Ax*;T%Ty)3vSzm;I92 zDkGVr`t8(cg|_qZ!_zLjLzqJ1<|q8`U}R7c$CaM-`p?0R06 z_DI9`bQ(S}H5CICc@+9J@0LJqL;s`7{%3M8M(Q$q$H&bAI`}SIp#%c{i3v%6EEUl4 G{(k`BK;3o# literal 0 HcmV?d00001 diff --git a/tools/onnx-graphsurgeon/onnx_graphsurgeon/exporters/onnx_exporter.py b/tools/onnx-graphsurgeon/onnx_graphsurgeon/exporters/onnx_exporter.py index d7e3e138..1aa64f3e 100644 --- a/tools/onnx-graphsurgeon/onnx_graphsurgeon/exporters/onnx_exporter.py +++ b/tools/onnx-graphsurgeon/onnx_graphsurgeon/exporters/onnx_exporter.py @@ -82,13 +82,32 @@ def update_import_domains(graph): DEFAULT_CUSTOM_OPSET_VERSION = 1 for used_domain in all_used_domains: if used_domain not in current_domains: - graph.import_domains.append( - onnx.helper.make_opsetid(used_domain, DEFAULT_CUSTOM_OPSET_VERSION) - ) + graph.import_domains.append(onnx.helper.make_opsetid(used_domain, DEFAULT_CUSTOM_OPSET_VERSION)) current_domains.add(used_domain) return graph.import_domains +# Converts a fp32 gs.Constant to a bf16 onnx.TensorProto +def tensor_to_onnx_bf16(tensor: Constant): + + # Converts the fp32 numpy array to bf16 values and store in a uint16 numpy array + def np_float32_to_bf16_as_uint16(arr): + new_arr = np.empty(arr.size, dtype=np.uint16) + flatten = arr.flatten() + for i in range(arr.size): + new_arr[i] = onnx.helper.float32_to_bfloat16(flatten[i]) + return new_arr.reshape(arr.shape) + + arr_bf16_as_uint16 = np_float32_to_bf16_as_uint16(tensor.values) + + onnx_tensor = onnx.TensorProto() + onnx_tensor.data_type = onnx.TensorProto.BFLOAT16 + onnx_tensor.dims.extend(arr_bf16_as_uint16.shape) + onnx_tensor.raw_data = arr_bf16_as_uint16.tobytes() + + return onnx_tensor + + class OnnxExporter(BaseExporter): @staticmethod def export_tensor_proto(tensor: Constant) -> onnx.TensorProto: @@ -97,7 +116,19 @@ def export_tensor_proto(tensor: Constant) -> onnx.TensorProto: if isinstance(tensor._values, LazyValues): onnx_tensor = tensor._values.tensor else: - onnx_tensor = onnx.numpy_helper.from_array(tensor.values) + if dtype_to_onnx(tensor.dtype) != dtype_to_onnx(tensor.export_dtype): + assert tensor.dtype == np.float32, ( + f"Cannot convert onnx dtype {dtype_to_onnx(tensor.dtype)} to {dtype_to_onnx(tensor.export_dtype)}." + "Only float32 to bfloat16 is supported" + ) + assert tensor.export_dtype == onnx.TensorProto.BFLOAT16, ( + f"Cannot convert onnx dtype {dtype_to_onnx(tensor.dtype)} to {dtype_to_onnx(tensor.export_dtype)}." + "Only float32 to bfloat16 is supported" + ) + onnx_tensor = tensor_to_onnx_bf16(tensor) + else: + onnx_tensor = onnx.numpy_helper.from_array(tensor.values) + if tensor.data_location is not None: onnx_tensor.data_location = tensor.data_location onnx_tensor.name = tensor.name @@ -108,9 +139,7 @@ def export_sparse_tensor_proto(tensor: Constant) -> onnx.SparseTensorProto: return tensor._values.tensor @staticmethod - def export_value_info_proto( - tensor: Tensor, do_type_check: bool - ) -> onnx.ValueInfoProto: + def export_value_info_proto(tensor: Tensor, do_type_check: bool) -> onnx.ValueInfoProto: if do_type_check and tensor.dtype is None: G_LOGGER.critical( "Graph input and output tensors must include dtype information. Please set the dtype attribute for: {:}".format( @@ -120,9 +149,7 @@ def export_value_info_proto( if tensor.dtype is not None: if isinstance(tensor, Constant) or tensor.type == "tensor_type": - onnx_tensor = onnx.helper.make_tensor_value_info( - tensor.name, dtype_to_onnx(tensor.dtype), tensor.shape - ) + onnx_tensor = onnx.helper.make_tensor_value_info(tensor.name, dtype_to_onnx(tensor.dtype), tensor.shape) elif tensor.type == "sequence_type": onnx_tensor = onnx.helper.make_tensor_sequence_value_info( tensor.name, dtype_to_onnx(tensor.dtype), tensor.shape @@ -152,9 +179,7 @@ def export_attributes(attrs: dict) -> List[onnx.AttributeProto]: # Netron has a bug which makes it crash if a Tensor attribute has no tensor data. # So provide some meaningless tensor data for Netron to read. if val.type == Tensor: - tensor_proto = OnnxExporter.export_tensor_proto( - Constant("", np.array([0], dtype=np.float32)) - ) + tensor_proto = OnnxExporter.export_tensor_proto(Constant("", np.array([0], dtype=np.float32))) onnx_attr.t.CopyFrom(tensor_proto) onnx_attr.ref_attr_name = val.name @@ -198,9 +223,7 @@ def export_function(func: Function) -> onnx.FunctionProto: for tensor in func.tensors().values(): if isinstance(tensor, Constant): # Copying the tensor prevents the new node from appearing in the Constant tensor's inputs. - new_const_nodes.append( - Node("Constant", attrs={"value": tensor}, outputs=[tensor.copy()]) - ) + new_const_nodes.append(Node("Constant", attrs={"value": tensor}, outputs=[tensor.copy()])) # Const nodes have no inputs, so this maintains a topological ordering. func_nodes = new_const_nodes + func_nodes @@ -247,14 +270,8 @@ def export_graph(graph: Graph, do_type_check=True) -> onnx.GraphProto: """ check_duplicate_node_names(graph.nodes, level=G_LOGGER.WARNING) nodes = [OnnxExporter.export_node(node) for node in graph.nodes] - inputs = [ - OnnxExporter.export_value_info_proto(inp, do_type_check) - for inp in graph.inputs - ] - outputs = [ - OnnxExporter.export_value_info_proto(out, do_type_check) - for out in graph.outputs - ] + inputs = [OnnxExporter.export_value_info_proto(inp, do_type_check) for inp in graph.inputs] + outputs = [OnnxExporter.export_value_info_proto(out, do_type_check) for out in graph.outputs] tensor_map = graph.tensors() initializer = [ OnnxExporter.export_tensor_proto(tensor) @@ -275,9 +292,7 @@ def export_graph(graph: Graph, do_type_check=True) -> onnx.GraphProto: # Omit tensors from value_info if we don't know their shape/dtype def has_value_info(tensor): - return isinstance(tensor, Variable) and ( - tensor.dtype is not None or tensor.shape is not None - ) + return isinstance(tensor, Variable) and (tensor.dtype is not None or tensor.shape is not None) value_info = [ OnnxExporter.export_value_info_proto(tensor, do_type_check) diff --git a/tools/onnx-graphsurgeon/onnx_graphsurgeon/ir/tensor.py b/tools/onnx-graphsurgeon/onnx_graphsurgeon/ir/tensor.py index 71425a94..1e3502fb 100644 --- a/tools/onnx-graphsurgeon/onnx_graphsurgeon/ir/tensor.py +++ b/tools/onnx-graphsurgeon/onnx_graphsurgeon/ir/tensor.py @@ -61,7 +61,12 @@ def is_empty(self): """ return self.name == "" - def to_constant(self, values: np.ndarray, data_location: int = None): + def to_constant( + self, + values: np.ndarray, + data_location: int = None, + export_dtype: Union[np.dtype, "onnx.TensorProto.DataType"] = None, + ): """ Modifies this tensor in-place to convert it to a Constant. This means that all consumers/producers of the tensor will see the update. @@ -72,12 +77,15 @@ def to_constant(self, values: np.ndarray, data_location: int = None): An enum value indicating the location where the tensor data is stored. Generally, this will come from onnx.TensorProto.DataLocation. + dtype (Union[numpy.dtype, onnx.TensorProto.DataType]): The data type of the tensor. Returns: self """ self.__class__ = Constant self._values = values self.data_location = data_location + self.export_dtype = export_dtype + return self def to_variable( @@ -95,9 +103,13 @@ def to_variable( Returns: self """ + + variable_dtype = dtype if dtype is not None else self.export_dtype + self.__class__ = Variable - self.dtype = dtype self.shape = shape + self.dtype = variable_dtype + return self def i(self, tensor_idx=0, producer_idx=0): @@ -184,10 +196,11 @@ def __init__( self.shape = misc.default_value(shape, None) self.type = type - def to_constant(self, values: np.ndarray): + def to_constant(self, values: np.ndarray, export_dtype: Union[np.dtype, "onnx.TensorProto.DataType"] = None): del self.dtype del self.shape - return super().to_constant(values) + + return super().to_constant(values, export_dtype=export_dtype) def copy(self): """ @@ -315,6 +328,7 @@ def __init__( name: str, values: Union[np.ndarray, LazyValues], data_location: int = None, + export_dtype: Union[np.dtype, "onnx.TensorProto.DataType"] = None, ): """ Represents a Tensor whose value is known. @@ -326,6 +340,11 @@ def __init__( data_location (int): An enum value indicating the location where the tensor data is stored. Generally, this will come from onnx.TensorProto.DataLocation. + + + export_dtype (Union[np.dtype, onnx.TensorProto.DataType]): + The data type of the tensor when exported to onnx. If not specified, then + the data type of values will be used. """ self.name = name self.inputs = misc.SynchronizedList(self, field_name="outputs", initial=[]) @@ -344,12 +363,18 @@ def __init__( ) self._values = values self.data_location = data_location + self._export_dtype = export_dtype - def to_variable( - self, dtype: np.dtype = None, shape: Sequence[Union[int, str]] = [] - ): + def to_variable(self, dtype: np.dtype = None, shape: Sequence[Union[int, str]] = []): + var_dtype = self.export_dtype + + del self._export_dtype del self._values - return super().to_variable(dtype, shape) + + if dtype is not None: + return super().to_variable(dtype, shape) + + return super().to_variable(var_dtype, shape) def copy(self): """ @@ -357,7 +382,7 @@ def copy(self): Note: Generally, you should only ever make a copy of a Graph. """ - return Constant(self.name, self._values) + return Constant(self.name, self._values, export_dtype=self.export_dtype) @property def values(self): @@ -378,6 +403,17 @@ def shape(self): def dtype(self): return self._values.dtype + @property + def export_dtype(self): + if self._export_dtype is not None: + return self._export_dtype + + return self.dtype + + @export_dtype.setter + def export_dtype(self, export_dtype): + self._export_dtype = export_dtype + def __repr__(self): # Hack to make logging output pretty. ret = self.__str__() ret += "\n{:}".format(self._values) diff --git a/tools/onnx-graphsurgeon/tests/test_examples.py b/tools/onnx-graphsurgeon/tests/test_examples.py index 64a28558..fd86fbb0 100644 --- a/tools/onnx-graphsurgeon/tests/test_examples.py +++ b/tools/onnx-graphsurgeon/tests/test_examples.py @@ -51,6 +51,9 @@ def __init__(self, name, infer=True): ("09_shape_operations_with_the_layer_api", [Artifact("model.onnx")]), ("10_dynamic_batch_size", [Artifact("model.onnx"), Artifact("dynamic.onnx")]), ("11_creating_a_local_function", [Artifact("model.onnx")]), + + # Skipping inference test as bf16 is not supported in ORT yet. + ("12_using_bf16", [Artifact("test_conv_bf16.onnx", infer=False)]), ] diff --git a/tools/onnx-graphsurgeon/tests/test_ir.py b/tools/onnx-graphsurgeon/tests/test_ir.py index 15f650f0..054c3514 100644 --- a/tools/onnx-graphsurgeon/tests/test_ir.py +++ b/tools/onnx-graphsurgeon/tests/test_ir.py @@ -33,13 +33,16 @@ class TensorBaseTests(object): def test_can_convert_in_place_to_constant(self): - tensor = self.tensor.to_constant(values=np.ones((1, 3, 5, 5), dtype=np.float64)) + tensor = self.tensor.to_constant( + values=np.ones((1, 3, 5, 5), dtype=np.float64), export_dtype=onnx.TensorProto.BFLOAT16 + ) assert tensor is self.tensor assert isinstance(tensor, Constant) assert isinstance(self.input_node.outputs[0], Constant) assert isinstance(self.output_node.inputs[0], Constant) assert tensor.shape == (1, 3, 5, 5) assert tensor.dtype == np.float64 + assert tensor.export_dtype == onnx.TensorProto.BFLOAT16 assert np.all(self.input_node.outputs[0].values == tensor.values) assert np.all(self.output_node.inputs[0].values == tensor.values) @@ -136,7 +139,7 @@ def test_equals_name_mismatch(self): class TestConstant(TensorBaseTests): def setup_method(self): self.tensor = Constant( - name="test_tensor", values=np.ones((1, 3, 5, 5), dtype=np.float64) + name="test_tensor", values=np.ones((1, 3, 5, 5), dtype=np.float64), export_dtype=onnx.TensorProto.BFLOAT16 ) self.input_node = Node( op="Add", outputs=[self.tensor] @@ -149,6 +152,9 @@ def test_can_get_shape(self): def test_can_get_dtype(self): assert self.tensor.dtype == np.float64 + def test_can_get_export_dtype(self): + assert self.tensor.export_dtype == onnx.TensorProto.BFLOAT16 + @pytest.fixture def node_with_nested_subgraphs():