diff --git a/.buildinfo b/.buildinfo index f8b07ad..9a3294a 100644 --- a/.buildinfo +++ b/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 89ada319c94fcb1610b7f80d777e8b12 +config: 0ea2334c76c1e774d577e20446a79224 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/.doctrees/deployment/1_tensorrt_llm_deployment.doctree b/.doctrees/deployment/1_tensorrt_llm_deployment.doctree index ecf83fb..5827105 100644 Binary files a/.doctrees/deployment/1_tensorrt_llm_deployment.doctree and b/.doctrees/deployment/1_tensorrt_llm_deployment.doctree differ diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle index b4a8fc1..eec7be3 100644 Binary files a/.doctrees/environment.pickle and b/.doctrees/environment.pickle differ diff --git a/.doctrees/examples/0_all_examples.doctree b/.doctrees/examples/0_all_examples.doctree index 154cbaa..5b66533 100644 Binary files a/.doctrees/examples/0_all_examples.doctree and b/.doctrees/examples/0_all_examples.doctree differ diff --git a/.doctrees/getting_started/1_overview.doctree b/.doctrees/getting_started/1_overview.doctree index 7be2a67..1a32aa0 100644 Binary files a/.doctrees/getting_started/1_overview.doctree and b/.doctrees/getting_started/1_overview.doctree differ diff --git a/.doctrees/getting_started/2_installation.doctree b/.doctrees/getting_started/2_installation.doctree index 741334c..77d5d11 100644 Binary files a/.doctrees/getting_started/2_installation.doctree and b/.doctrees/getting_started/2_installation.doctree differ diff --git a/.doctrees/getting_started/3_quantization.doctree b/.doctrees/getting_started/3_quantization.doctree index 0ce73de..ee2bfd1 100644 Binary files a/.doctrees/getting_started/3_quantization.doctree and b/.doctrees/getting_started/3_quantization.doctree differ diff --git a/.doctrees/getting_started/5_distillation.doctree b/.doctrees/getting_started/5_distillation.doctree new file mode 100644 index 0000000..9e485dd Binary files /dev/null and b/.doctrees/getting_started/5_distillation.doctree differ diff --git a/.doctrees/getting_started/6_sparsity.doctree b/.doctrees/getting_started/6_sparsity.doctree index 60fb311..a9779ad 100644 Binary files a/.doctrees/getting_started/6_sparsity.doctree and b/.doctrees/getting_started/6_sparsity.doctree differ diff --git a/.doctrees/guides/1_quantization.doctree b/.doctrees/guides/1_quantization.doctree index 43906e9..bcc2ba5 100644 Binary files a/.doctrees/guides/1_quantization.doctree and b/.doctrees/guides/1_quantization.doctree differ diff --git a/.doctrees/guides/4_distillation.doctree b/.doctrees/guides/4_distillation.doctree new file mode 100644 index 0000000..4e2c5e0 Binary files /dev/null and b/.doctrees/guides/4_distillation.doctree differ diff --git a/.doctrees/guides/5_sparsity.doctree b/.doctrees/guides/5_sparsity.doctree index a1e8396..e2418be 100644 Binary files a/.doctrees/guides/5_sparsity.doctree and b/.doctrees/guides/5_sparsity.doctree differ diff --git a/.doctrees/guides/_basic_quantization.doctree b/.doctrees/guides/_basic_quantization.doctree index 82c5be6..139d290 100644 Binary files a/.doctrees/guides/_basic_quantization.doctree and b/.doctrees/guides/_basic_quantization.doctree differ diff --git a/.doctrees/guides/_onnx_quantization.doctree b/.doctrees/guides/_onnx_quantization.doctree index 980db0b..66299e4 100644 Binary files a/.doctrees/guides/_onnx_quantization.doctree and b/.doctrees/guides/_onnx_quantization.doctree differ diff --git a/.doctrees/guides/_pytorch_quantization.doctree b/.doctrees/guides/_pytorch_quantization.doctree index 44f7511..fdd597d 100644 Binary files a/.doctrees/guides/_pytorch_quantization.doctree and b/.doctrees/guides/_pytorch_quantization.doctree differ diff --git a/.doctrees/index.doctree b/.doctrees/index.doctree index 5b3f64d..4bfcada 100644 Binary files a/.doctrees/index.doctree and b/.doctrees/index.doctree differ diff --git a/.doctrees/reference/0_versions.doctree b/.doctrees/reference/0_versions.doctree index 993786f..136aec9 100644 Binary files a/.doctrees/reference/0_versions.doctree and b/.doctrees/reference/0_versions.doctree differ diff --git a/.doctrees/reference/generated/modelopt.deploy.doctree b/.doctrees/reference/generated/modelopt.deploy.doctree index b460ffd..155423f 100644 Binary files a/.doctrees/reference/generated/modelopt.deploy.doctree and b/.doctrees/reference/generated/modelopt.deploy.doctree differ diff --git a/.doctrees/reference/generated/modelopt.deploy.llm.doctree b/.doctrees/reference/generated/modelopt.deploy.llm.doctree index 2a1f1ad..ac37a3e 100644 Binary files a/.doctrees/reference/generated/modelopt.deploy.llm.doctree and b/.doctrees/reference/generated/modelopt.deploy.llm.doctree differ diff --git a/.doctrees/reference/generated/modelopt.deploy.llm.generate.doctree b/.doctrees/reference/generated/modelopt.deploy.llm.generate.doctree index cd4ad15..143722f 100644 Binary files a/.doctrees/reference/generated/modelopt.deploy.llm.generate.doctree and b/.doctrees/reference/generated/modelopt.deploy.llm.generate.doctree differ diff --git a/.doctrees/reference/generated/modelopt.deploy.llm.model_config_trt.doctree b/.doctrees/reference/generated/modelopt.deploy.llm.model_config_trt.doctree deleted file mode 100644 index 58dc3bc..0000000 Binary files a/.doctrees/reference/generated/modelopt.deploy.llm.model_config_trt.doctree and /dev/null differ diff --git a/.doctrees/reference/generated/modelopt.onnx.op_types.doctree b/.doctrees/reference/generated/modelopt.onnx.op_types.doctree index 53c54ea..ad61934 100644 Binary files a/.doctrees/reference/generated/modelopt.onnx.op_types.doctree and b/.doctrees/reference/generated/modelopt.onnx.op_types.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.calib_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.calib_utils.doctree index 39b0c32..7c3e4d4 100644 Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.calib_utils.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.calib_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.doctree index aec17b1..8257731 100644 Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.extensions.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.extensions.doctree new file mode 100644 index 0000000..a5d0036 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.extensions.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.fp8.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.fp8.doctree new file mode 100644 index 0000000..b4c899d Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.fp8.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.graph_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.graph_utils.doctree index 09e9d7b..dd46ca2 100644 Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.graph_utils.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.graph_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.int4.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.int4.doctree index dccb0be..9c2be49 100644 Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.int4.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.int4.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.int8.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.int8.doctree new file mode 100644 index 0000000..4d41b20 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.int8.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.ort_patching.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.ort_patching.doctree index b3eaa47..40afe21 100644 Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.ort_patching.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.ort_patching.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.ort_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.ort_utils.doctree index c76eb5f..9fa8ea3 100644 Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.ort_utils.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.ort_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.qdq_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.qdq_utils.doctree index 04c744c..27d8839 100644 Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.qdq_utils.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.qdq_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.quant_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.quant_utils.doctree index 3d2b1ca..ccd3767 100644 Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.quant_utils.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.quant_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.quantize.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.quantize.doctree index da5a0ee..eb5de3e 100644 Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.quantize.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.quantize.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.utils.doctree b/.doctrees/reference/generated/modelopt.onnx.utils.doctree index 5339a16..8716609 100644 Binary files a/.doctrees/reference/generated/modelopt.onnx.utils.doctree and b/.doctrees/reference/generated/modelopt.onnx.utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.distill.config.doctree b/.doctrees/reference/generated/modelopt.torch.distill.config.doctree new file mode 100644 index 0000000..48b2bb9 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.config.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.distill.distillation.doctree b/.doctrees/reference/generated/modelopt.torch.distill.distillation.doctree new file mode 100644 index 0000000..89fe63f Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.distillation.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.distill.distillation_model.doctree b/.doctrees/reference/generated/modelopt.torch.distill.distillation_model.doctree new file mode 100644 index 0000000..d7c7bd8 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.distillation_model.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.distill.doctree b/.doctrees/reference/generated/modelopt.torch.distill.doctree new file mode 100644 index 0000000..a210068 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.distill.loss_balancers.doctree b/.doctrees/reference/generated/modelopt.torch.distill.loss_balancers.doctree new file mode 100644 index 0000000..057544c Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.loss_balancers.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.distill.losses.doctree b/.doctrees/reference/generated/modelopt.torch.distill.losses.doctree new file mode 100644 index 0000000..21c0f1a Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.losses.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.distill.mode.doctree b/.doctrees/reference/generated/modelopt.torch.distill.mode.doctree new file mode 100644 index 0000000..3c9a400 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.mode.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.distill.registry.doctree b/.doctrees/reference/generated/modelopt.torch.distill.registry.doctree new file mode 100644 index 0000000..e6f561b Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.registry.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.doctree b/.doctrees/reference/generated/modelopt.torch.doctree index cd4233c..3358933 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.doctree and b/.doctrees/reference/generated/modelopt.torch.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.distribute.doctree b/.doctrees/reference/generated/modelopt.torch.export.distribute.doctree index 97c6425..7a6222e 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.export.distribute.doctree and b/.doctrees/reference/generated/modelopt.torch.export.distribute.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.doctree b/.doctrees/reference/generated/modelopt.torch.export.doctree index 1a4d247..78a408e 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.export.doctree and b/.doctrees/reference/generated/modelopt.torch.export.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.hf_config_map.doctree b/.doctrees/reference/generated/modelopt.torch.export.hf_config_map.doctree new file mode 100644 index 0000000..fdc4497 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.hf_config_map.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.layer_utils.doctree b/.doctrees/reference/generated/modelopt.torch.export.layer_utils.doctree index 19ca4d8..d089388 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.export.layer_utils.doctree and b/.doctrees/reference/generated/modelopt.torch.export.layer_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.model_config.doctree b/.doctrees/reference/generated/modelopt.torch.export.model_config.doctree index d451e1e..5907cb4 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.export.model_config.doctree and b/.doctrees/reference/generated/modelopt.torch.export.model_config.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.model_config_export.doctree b/.doctrees/reference/generated/modelopt.torch.export.model_config_export.doctree index 1040ff5..ea03b09 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.export.model_config_export.doctree and b/.doctrees/reference/generated/modelopt.torch.export.model_config_export.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.scaling_factor_utils.doctree b/.doctrees/reference/generated/modelopt.torch.export.scaling_factor_utils.doctree index 01d94e4..058e26a 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.export.scaling_factor_utils.doctree and b/.doctrees/reference/generated/modelopt.torch.export.scaling_factor_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.tensorrt_llm_utils.doctree b/.doctrees/reference/generated/modelopt.torch.export.tensorrt_llm_utils.doctree index 8741444..0fb5f9b 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.export.tensorrt_llm_utils.doctree and b/.doctrees/reference/generated/modelopt.torch.export.tensorrt_llm_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.vllm.doctree b/.doctrees/reference/generated/modelopt.torch.export.vllm.doctree new file mode 100644 index 0000000..ece0f83 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.vllm.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.opt.hparam.doctree b/.doctrees/reference/generated/modelopt.torch.opt.hparam.doctree index d727c92..2fd48bf 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.opt.hparam.doctree and b/.doctrees/reference/generated/modelopt.torch.opt.hparam.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.opt.searcher.doctree b/.doctrees/reference/generated/modelopt.torch.opt.searcher.doctree index 7a5f751..433575f 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.opt.searcher.doctree and b/.doctrees/reference/generated/modelopt.torch.opt.searcher.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.opt.utils.doctree b/.doctrees/reference/generated/modelopt.torch.opt.utils.doctree index 0bc4c08..378f6c9 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.opt.utils.doctree and b/.doctrees/reference/generated/modelopt.torch.opt.utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.algorithms.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.algorithms.doctree new file mode 100644 index 0000000..79d555d Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.algorithms.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.calib.histogram.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.calib.histogram.doctree index a7c0fb0..6c7c148 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.calib.histogram.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.calib.histogram.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.calib.max.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.calib.max.doctree index 5f8b221..50dcfa7 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.calib.max.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.calib.max.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.config.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.config.doctree index e7633c0..c4e24bf 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.config.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.config.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.conversion.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.conversion.doctree index 72be1dc..8a99ff4 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.conversion.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.conversion.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.doctree index cd84875..ccb4b00 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.extensions.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.extensions.doctree index a673f09..f35fd1d 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.extensions.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.extensions.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.model_calib.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.model_calib.doctree index c6ac50f..3aa188a 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.model_calib.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.model_calib.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.model_quant.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.model_quant.doctree index 6879fb5..a42169c 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.model_quant.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.model_quant.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.doctree index 448084b..6162689 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.doctree index 9be4758..98a1573 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.doctree index f8c99c2..6a35ff8 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.doctree index 0f9f93c..9ef9894 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.doctree new file mode 100644 index 0000000..29b37d9 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.doctree index a452e17..892cf45 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.plugins.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.plugins.doctree index b1b8bc7..4f1cfda 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.plugins.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.plugins.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.doctree new file mode 100644 index 0000000..584f36d Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.doctree new file mode 100644 index 0000000..dc579f6 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.doctree new file mode 100644 index 0000000..faf950d Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.doctree new file mode 100644 index 0000000..ec1b13c Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.tensor_quant.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.tensor_quant.doctree index 0ab87a9..650d31f 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.tensor_quant.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.tensor_quant.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.dataset_utils.doctree b/.doctrees/reference/generated/modelopt.torch.utils.dataset_utils.doctree index 7063a1f..ab7abc2 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.utils.dataset_utils.doctree and b/.doctrees/reference/generated/modelopt.torch.utils.dataset_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.distributed.doctree b/.doctrees/reference/generated/modelopt.torch.utils.distributed.doctree index adb542c..2f8b8f1 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.utils.distributed.doctree and b/.doctrees/reference/generated/modelopt.torch.utils.distributed.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.network.doctree b/.doctrees/reference/generated/modelopt.torch.utils.network.doctree index 0ec5a03..8464a86 100644 Binary files a/.doctrees/reference/generated/modelopt.torch.utils.network.doctree and b/.doctrees/reference/generated/modelopt.torch.utils.network.doctree differ diff --git a/.doctrees/support/1_contact.doctree b/.doctrees/support/1_contact.doctree index 5572c37..f4796cd 100644 Binary files a/.doctrees/support/1_contact.doctree and b/.doctrees/support/1_contact.doctree differ diff --git a/.doctrees/support/2_faqs.doctree b/.doctrees/support/2_faqs.doctree index 34b2b30..cfebc1c 100644 Binary files a/.doctrees/support/2_faqs.doctree and b/.doctrees/support/2_faqs.doctree differ diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..5442ede --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +.doctrees/environment.pickle filter=lfs diff=lfs merge=lfs -text diff --git a/_sources/deployment/1_tensorrt_llm_deployment.rst.txt b/_sources/deployment/1_tensorrt_llm_deployment.rst.txt index ea32741..3b04d0d 100644 --- a/_sources/deployment/1_tensorrt_llm_deployment.rst.txt +++ b/_sources/deployment/1_tensorrt_llm_deployment.rst.txt @@ -90,50 +90,55 @@ If the :meth:`export_tensorrt_llm_checkpoint `_ -for all ModelOpt examples. +All examples can be accessed from the ModelOpt GitHub repository at +`github.com/NVIDIA/TensorRT-Model-Optimizer `_. diff --git a/_sources/getting_started/1_overview.rst.txt b/_sources/getting_started/1_overview.rst.txt index b39fc37..558d512 100644 --- a/_sources/getting_started/1_overview.rst.txt +++ b/_sources/getting_started/1_overview.rst.txt @@ -7,8 +7,8 @@ Overview Minimizing inference costs presents a significant challenge as generative AI models continue to grow in complexity and size. The `NVIDIA TensorRT Model Optimizer `_ (referred to as Model Optimizer, or ModelOpt) is a library comprising state-of-the-art model optimization techniques including quantization and sparsity to compress model. -It accepts a torch or ONNX model as inputs and provides Python APIs for users to easily stack different model optimization -techniques to produce quantized checkpoint. Seamlessly integrated within the NVIDIA AI software ecosystem, the quantized +It accepts a torch or ONNX model as input and provides Python APIs for users to easily stack different model optimization +techniques to produce optimized & quantized checkpoints. Seamlessly integrated within the NVIDIA AI software ecosystem, the quantized checkpoint generated from Model Optimizer is ready for deployment in downstream inference frameworks like `TensorRT-LLM `_ or `TensorRT `_. Further integrations are planned for `NVIDIA NeMo `_ and `Megatron-LM `_ @@ -16,7 +16,7 @@ for training-in-the-loop optimization techniques. For enterprise users, the 8-bi `NVIDIA NIM `_. Model Optimizer is available for free for all developers on `NVIDIA PyPI `_. -Visit `/NVIDIA/TensorRT-Model-Optimizer repository `_ for end-to-end +Visit the `TensorRT Model Optimizer GitHub repository `_ for end-to-end example scripts and recipes optimized for NVIDIA GPUs. Techniques @@ -34,8 +34,11 @@ for list of formats supported. Sparsity ^^^^^^^^ Sparsity is a technique to further reduce the memory footprint of deep learning models and accelerate the inference. -Model Optimizer provides Python API :meth:`mts.sparsify() ` to apply -weight sparsity to a given model. The ``mts.sparsify()`` API supports `NVIDIA 2:4 `_ -sparsity pattern and various sparsification methods, such as NVIDIA `ASP `_ -and `SparseGPT `_. It supports both post-training sparsity and sparsity with fine-tuning. -The latter workflow is recommended to minimize accuracy degradation. +Model Optimizer provides the Python API :meth:`mts.sparsify() ` to +automatically apply weight sparsity to a given model. The +:meth:`mts.sparsify() ` API supports +`NVIDIA 2:4 `_ sparsity pattern and various sparsification methods, +such as `NVIDIA ASP `_ and +`SparseGPT `_. It supports both post-training sparsity (PTS) and +sparsity-aware training (SAT). The latter workflow is recommended to minimize accuracy +degradation. diff --git a/_sources/getting_started/2_installation.rst.txt b/_sources/getting_started/2_installation.rst.txt index f015882..35c70ec 100644 --- a/_sources/getting_started/2_installation.rst.txt +++ b/_sources/getting_started/2_installation.rst.txt @@ -7,17 +7,19 @@ System requirements Model Optimizer (``nvidia-modelopt``) currently has the following system requirements: -+----------------------+-----------------------------+ -| OS | Linux, Windows | -+----------------------+-----------------------------+ -| Architecture | x86_64, aarch64, win_amd64 | -+----------------------+-----------------------------+ -| Python | >=3.8,<3.12 | -+----------------------+-----------------------------+ -| PyTorch | >=1.11 | -+----------------------+-----------------------------+ -| CUDA | >=11.8 (Recommended) | -+----------------------+-----------------------------+ ++-------------------------+-----------------------------+ +| OS | Linux | ++-------------------------+-----------------------------+ +| Architecture | x86_64 | ++-------------------------+-----------------------------+ +| Python | >=3.8,<3.13 | ++-------------------------+-----------------------------+ +| CUDA | >=11.8 (Recommended) | ++-------------------------+-----------------------------+ +| PyTorch (Optional) | >=1.11 | ++-------------------------+-----------------------------+ +| TensorRT-LLM (Optional) | 0.11 | ++-------------------------+-----------------------------+ Install Model Optimizer ======================= @@ -34,11 +36,11 @@ license terms of ModelOpt and any dependencies before use. **Setting up a virtual environment** We recommend setting up a virtual environment if you don't have one already. Run the following - command to set up and activate a ``conda`` virtual environment named ``modelopt`` with Python 3.11: + command to set up and activate a ``conda`` virtual environment named ``modelopt`` with Python 3.12: .. code-block:: bash - conda create -n modelopt python=3.11 pip + conda create -n modelopt python=3.12 pip .. code-block:: bash @@ -89,11 +91,14 @@ license terms of ModelOpt and any dependencies before use. * - ``transformers`` (Huggingface) - ``[hf]`` + If you want to install only partial dependencies, please replace ``[all]`` with the desired + optional dependencies for the below ``pip`` installation command. + **Install Model Optimizer** (``nvidia-modelopt``) .. code-block:: bash - pip install "nvidia-modelopt[all]" --no-cache-dir --extra-index-url https://pypi.nvidia.com + pip install "nvidia-modelopt[all]" --extra-index-url https://pypi.nvidia.com Check installation ================== @@ -103,7 +108,7 @@ Check installation When you use ModelOpt's PyTorch quantization APIs for the first time, it will compile the fast quantization kernels using your installed torch and CUDA if available. This may take a few minutes but subsequent quantization calls will be much faster. - To invoke the compilation now and check if it is successful, run the following command: + To invoke the compilation and check if it is successful or pre-compile for docker builds, run the following command: .. code-block:: bash diff --git a/_sources/getting_started/3_quantization.rst.txt b/_sources/getting_started/3_quantization.rst.txt index 693a8b2..b128e78 100644 --- a/_sources/getting_started/3_quantization.rst.txt +++ b/_sources/getting_started/3_quantization.rst.txt @@ -9,8 +9,8 @@ Quantization is an effective technique to reduce the memory footprint of deep le accelerate the inference speed. ModelOpt's :meth:`mtq.quantize() ` API enables -users to quantize a model with advanced algorithms like SmoothQuant, AWQ etc. ModelOpt supports both -Post Training Quantization (PTQ) and Quantization Aware Training (QAT). +users to quantize a model with advanced algorithms like SmoothQuant, AWQ, and more. ModelOpt +supports both Post Training Quantization (PTQ) and Quantization Aware Training (QAT). .. tip:: @@ -21,7 +21,7 @@ PTQ for PyTorch models ----------------------------- :meth:`mtq.quantize ` requires the model, -the appropriate quantization configuration and a forward loop as inputs. Here is a quick example of +the appropriate quantization configuration, and a forward loop as inputs. Here is a quick example of quantizing a model with int8 SmoothQuant using :meth:`mtq.quantize `: @@ -55,8 +55,8 @@ Deployment The quantized model is just like a regular Pytorch model and is ready for evaluation or deployment. Huggingface or Nemo LLM models can be exported to TensorRT-LLM using ModelOpt. -Please see :doc:`TensorRT-LLM Deployment <../deployment/1_tensorrt_llm_deployment>` guide for more -details. +Please see the :doc:`TensorRT-LLM Deployment <../deployment/1_tensorrt_llm_deployment>` guide for +more details. The model can be also exported to ONNX using `torch.onnx.export `_. diff --git a/_sources/getting_started/5_distillation.rst.txt b/_sources/getting_started/5_distillation.rst.txt new file mode 100644 index 0000000..8950cdc --- /dev/null +++ b/_sources/getting_started/5_distillation.rst.txt @@ -0,0 +1,115 @@ + +========================= +Quick Start: Distillation +========================= + +ModelOpt's :doc:`Distillation <../guides/4_distillation>` is a set of wrappers and utilities +to easily perform Knowledge Distillation among teacher and student models. +Given a pretrained teacher model, Distillation has the potential to train a smaller student model +faster and/or with higher accuracy than the student model could achieve on its own. + +This quick-start guide shows the necessary steps to integrate Distillation into your +training pipeline. + +Set up your base models +----------------------- + +First obtain both a pretrained model to act as the teacher and a (usualy smaller) model to serve +as the student. + +.. code-block:: python + + from torchvision.models import resnet50, resnet18 + + # Define student + student_model = resnet18() + + + # Define callable which returns teacher + def teacher_factory(): + teacher_model = resnet50() + teacher_model.load_state_dict(pretrained_weights) + return teacher_model + + +Set up the meta model +--------------------- + +As Knowledge Distillation involves (at least) two models, ModelOpt simplifies the integration +process by wrapping both student and teacher into one meta model. + +Please see an example Distillation setup below. This example assumes the outputs +of ``teacher_model`` and ``student_model`` are logits. + +.. code-block:: python + + import modelopt.torch.distill as mtd + + distillation_config = { + "teacher_model": teacher_factory, # model initializer + "criterion": mtd.LogitsDistillationLoss(), # callable receiving student and teacher outputs, in order + "loss_balancer": mtd.StaticLossBalancer(), # combines multiple losses; omit if only one distillation loss used + } + + distillation_model = mtd.convert(student_model, mode=[("kd_loss", distillation_config)]) + +The ``teacher_model`` can be either a callable which returns an ``nn.Module`` or a tuple of ``(model_cls, args, kwargs)``. +The ``criterion`` is the distillation loss used between student and teacher tensors. +The ``loss_balancer`` determines how the original and distillation losses are combined (if needed). + +See :doc:`Distillation <../guides/4_distillation>` for more info. + + +Distill during training +----------------------- + +To Distill from teacher to student, simply use the meta model in the usual training loop, while +also using the meta model's ``.compute_kd_loss()`` method to compute the distillation loss, in addition to +the original user loss. + +An example of Distillation training is given below: + +.. code-block:: python + :emphasize-lines: 14 + + # Setup the data loaders. As example: + train_loader = get_train_loader() + + # Define user loss function. As example: + loss_fn = get_user_loss_fn() + + for input, labels in train_dataloader: + distillation_model.zero_grad() + # Forward through the wrapped models + out = distillation_model(input) + # Same loss as originally present + loss = loss_fn(out, labels) + # Combine distillation and user losses + loss_total = distillation_model.compute_kd_loss(student_loss=loss) + loss_total.backward() + + +.. note:: + `DataParallel `_ may + break ModelOpt's Distillation feature. + Note that `HuggingFace Trainer `_ + uses DataParallel by default. + + +Export trained model +-------------------- + +The model can easily be reverted to its original class for further use (i.e deployment) +without any ModelOpt modifications attached. + +.. code-block:: python + + model = mtd.export(distillation_model) + + +-------------------------------- + +**Next steps** + * Learn more about :doc:`Distillation <../guides/4_distillation>`. + * See ModelOpt's :doc:`API documentation <../reference/1_modelopt_api>` for detailed + functionality and usage information. diff --git a/_sources/getting_started/6_sparsity.rst.txt b/_sources/getting_started/6_sparsity.rst.txt index 72881e1..668533d 100644 --- a/_sources/getting_started/6_sparsity.rst.txt +++ b/_sources/getting_started/6_sparsity.rst.txt @@ -6,13 +6,13 @@ Sparsity -------- ModelOpt's :doc:`sparsity<../guides/5_sparsity>` feature is an effective technique to reduce the -memory footprint of deep learning models and accelerate the inference speed. ModelOpt provides an +memory footprint of deep learning models and accelerate the inference speed. ModelOpt provides the easy-to-use API :meth:`mts.sparsify() ` to apply weight sparsity to a given model. :meth:`mts.sparsify() ` supports `NVIDIA 2:4 Sparsity `_ sparsity pattern and various sparsification -methods, such as (`NVIDIA ASP `_) -and (`SparseGPT `_). +methods, such as `NVIDIA ASP `_ +and `SparseGPT `_. This guide provides a quick start to apply weight sparsity to a PyTorch model using ModelOpt. @@ -38,7 +38,7 @@ Here is a quick example of sparsifying a model to 2:4 sparsity pattern with Spar sparsity_config = {"data_loader": data_loader, "collect_func": lambda x: x} # Sparsify the model and perform calibration (PTS) - model = mts.sparsity(model, mode="sparsegpt", config=sparsity_config) + model = mts.sparsify(model, mode="sparsegpt", config=sparsity_config) .. note:: `data_loader` is only required in case of data-driven sparsity, e.g., SparseGPT for calibration. @@ -48,10 +48,19 @@ Here is a quick example of sparsifying a model to 2:4 sparsity pattern with Spar `data_loader` and `collect_func` can be substituted with a `forward_loop` that iterates the model through the calibration dataset. +Sparsity-aware Training (SAT) for PyTorch models +------------------------------------------------ + +After sparsifying the model, you can save the checkpoint for the sparsified model and use it for +fine-tuning the sparsified model. Check out the +`GitHub end-to-end example `_ +to learn more about SAT. + + -------------------------------- **Next Steps** * Learn more about sparsity and advanced usage of ModelOpt sparsity in :doc:`Sparsity guide <../guides/5_sparsity>`. - * Checkout out the end-to-end examples on GitHub for PTQ and QAT - `here `_. + * Checkout out the `end-to-end example on GitHub `_ + for PTS and SAT. diff --git a/_sources/guides/1_quantization.rst.txt b/_sources/guides/1_quantization.rst.txt index a0ab8ec..2a9bf62 100644 --- a/_sources/guides/1_quantization.rst.txt +++ b/_sources/guides/1_quantization.rst.txt @@ -4,10 +4,10 @@ Quantization ModelOpt quantization toolkit supports quantization for NVIDIA's hardware and software stack. Currently ModelOpt supports quantization in PyTorch and ONNX frameworks. -ModelOpt is based on simulated quantization in the original precision to simulate, test and optimize -for the best trade-off between the accuracy of the model and different low-precision formats. To -achieve actual speedups and memory savings, the model with simulated quantization can be exported to -deployment frameworks, like TensorRT or TensorRT-LLM. Please refer to the +ModelOpt is based on simulated quantization in the original precision to simulate, test, and +optimize for the best trade-off between the accuracy of the model and different low-precision +formats. To achieve actual speedups and memory savings, the model with simulated quantization can be +exported to deployment frameworks, like TensorRT or TensorRT-LLM. Please refer to the `TensorRT-Model-Optimizer GitHub repository `_ for more details and examples. diff --git a/_sources/guides/4_distillation.rst.txt b/_sources/guides/4_distillation.rst.txt new file mode 100644 index 0000000..a5543a4 --- /dev/null +++ b/_sources/guides/4_distillation.rst.txt @@ -0,0 +1,193 @@ +============ +Distillation +============ + +Introduction +============ + +ModelOpt's Distillation API (:mod:`modelopt.torch.distill `) allows you to enable a +knowledge-distillation training pipeline with minimal script modification. + +Follow the steps described below to obtain a model trained with direct knowledge transferred from +a more powerful teacher model using :mod:`modelopt.torch.distill `: + +#. **Convert your model via** :meth:`mtd.convert `: + Wrap both a teacher and student model into a larger meta-model which abstracts away the + interaction between the two. +#. **Distillation training**: Seamlessly use the meta-model in place of the original model and run + the orignal script with only one additional line of code for loss calculation. +#. **Checkpoint and re-load**: Save the model via :meth:`mto.save ` and + restore via :meth:`mto.restore ` + +*To find out more about Distillation and related concepts, please refer to the below section* +:ref:`Distillation Concepts `. + +.. _distillation-conversion: + +Convert and integrate +===================== + +You can convert your model into a :class:`DistillationModel ` +using :meth:`mtd.convert() `. + + +Example usage: + +.. code-block:: python + + import modelopt.torch.distill as mtd + from torchvision.models import resnet50 + + # User-defined model (student) + model = resnet50() + + # Configure and convert for distillation + distillation_config = { + # `teacher_model` is a model class or callable, or a tuple. + # If a tuple, it must be of the form (model_cls_or_callable,) or + # (model_cls_or_callable, args) or (model_cls_or_callable, args, kwargs). + "teacher_model": teacher_model, + "criterion": mtd.LogitsDistillationLoss(), + "loss_balancer": mtd.StaticLossBalancer(), + } + distillation_model = mtd.convert(model, mode=[("kd_loss", distillation_config)]) + + # Export model in original class form + model_exported = mtd.export(distillation_model) + +.. note:: + The config requires a (non-lambda) Callable to return a teacher model in place of the model + itself. This is to avoid re-saving the teacher state dict upon saving the Distillation + meta model. Thus, the same callable must be available in the namespace when restoring via + the :meth:`mto.restore ` utility. + +.. note:: + As the model is not of the same class anymore, calling ``type()`` on the model after conversion + will not work as expected. + Though ``isinstance()`` will still work, as the model dynamically becomes a subclass of the original's. + +--- + +.. _distillation-concepts: + +Distillation Concepts +===================== + +Below, we will provide an overview of ModelOpt's distillation feature as well as its basic +concepts and terminology. + +Overview +-------- + + +.. list-table:: Glossary + :widths: 55 90 + :header-rows: 0 + + * - `Knowledge Distillation`_ + - The transfer of learnable feature information from a teacher model to a student. + * - `Student`_ + - The model to be trained (can either start from scratch or pre-trained). + * - `Teacher`_ + - The fixed, pre-trained model used as the example the student will "learn" from. + * - `Distillation loss`_ + - A loss function used between the features of a student and teacher to perform Knowledge + Distillation, separate from the student's original task loss. + * - `Loss Balancer`_ + - An implementation for a utility which determines how to combine Distillation loss(es) and + orignal student task loss into a single scalar. + * - `Soft-label Distillation`_ + - The specific process of performing Knowledge Distillation between output logits of a teacher + and student models. + + +Concepts +-------- + +Knowledge Distillation +^^^^^^^^^^^^^^^^^^^^^^ + +Distillation can be a broader term used to define any sort of information compressed among models, +but in this case we refer to basic teacher-student Knowledge Distillation. The process creates an +auxilliary loss (or can replace the orignal one) between a model which is already trained (teacher) +and a model which is not (student), in hopes of making the student learn information (i.e. feature +maps or logits) which the teacher has already mastered. This can serve multiple purposes: + + **A.** Model-size reduction: A smaller, efficient student model (potentially a pruned teacher) reaching + accuracies near or exceeding that of the larger, slower teacher model. (See the + `Lottery Ticket Hypothesis <1_>`_ for reasoning behind this, which also applies to pruning) + + **B.** An alternative to pure training: Distilling a model from an existing one (and then + fine-tuning) can often be faster than training it from scratch. + + **C.** Module replacement: One can replace a single module within a model with a more efficient one + and use distillation on its original outputs to effectively re-integrate it into the whole model. + +Student +^^^^^^^ + +This is the model we wish to train and use in the end. It ideally meets the desired architectural +and computational requirements, but is either untrained or requires a boost in accuracy. + +Teacher +^^^^^^^ + +This is the model from which learned features/information are used to create a loss for the student. +Usually it is larger and/or slower than desired, but possesses a satisfactory accuracy. + +Distillation loss +^^^^^^^^^^^^^^^^^ + +To actually "transfer" knowledge from a teacher to student, we need to add (or replace) an +optimization objective to the student's original loss function(s). This can be as simple as enacting +MSE on two same-sized activation tensors between the teacher and student, with the assumption that +the features learned by the teacher are of high-quality and should be imitated as much as possible. + +ModelOpt supports specifying a different loss function per layer-output pair, and includes a few +pre-defined functions for use, though users may often need to define their own. +Module-pairs-to-loss-function mappings are specified via the ``criterion`` key of the configuration +dictionary - student and teacher, respectively in order - and the loss function itself should accept +outputs in the same order as well: + +.. code-block:: python + + # Example using pairwise-mapped criterion. + # Will perform the loss on the output of ``student_model.classifier`` and ``teacher_model.layers.18`` + distillation_config = { + "teacher_model": teacher_model, + "criterion": {("classifier", "layers.18"): mtd.LogitsDistillationLoss()}, + } + distillation_model = atd.convert(student_model, mode=[("kd_loss", distillation_config)]) + +The intermediate outputs for the losses are captured by the +:class:`DistillationModel ` and then the loss(es) are +invoked using :meth:`DistillationModel.compute_kd_loss() `. +If present, the orignal student's non-distillation loss is passed in as an argument. + +Writing a custom loss function is often necessary, especially to handle outputs that need to be processed +to obtain the logits and activations. + +Loss Balancer +^^^^^^^^^^^^^ + +As Distillation losses may be applied to several pairs of layers, the losses are returned in the +form of a dictionary which should be reduced into a scalar value for backpropagation. A Loss +Balancer (whose interface is defined by +:class:`DistillationLossBalancer `) serves to fill +this purpose. + +If Distillation loss is only applied to a single pair of layer outputs, and no student loss is available, +a Loss Balancer should not be provided. + +ModelOpt provides a simple Balancer implementation, and the aforementioned interface can be used to create custom ones. + +Soft-label Distillation +^^^^^^^^^^^^^^^^^^^^^^^ + +The scenario involving distillation only on the output logits of student/teacher classification +models is known as Soft-label Distillation. In this case, one could even omit the student's original +classification loss altogether if the teacher's outputs are purely preferred over whatever the +ground truth labels may be. + + +.. _1: https://arxiv.org/abs/1803.03635 diff --git a/_sources/guides/5_sparsity.rst.txt b/_sources/guides/5_sparsity.rst.txt index 94d14eb..9d1db31 100644 --- a/_sources/guides/5_sparsity.rst.txt +++ b/_sources/guides/5_sparsity.rst.txt @@ -7,7 +7,7 @@ Introduction ModelOpt's Sparsity module (:mod:`modelopt.torch.sparsity `) enables you to sparsify the weights of your model. This can be useful for reducing the memory footprint of -your model, and can also be used to speed up inference. +your model and can also be used to speed up inference. Follow the steps described below to obtain a model with sparse weights using ModelOpt's Sparsity @@ -20,7 +20,7 @@ module :mod:`modelopt.torch.sparsity`: #. **Checkpoint and re-load**: Save the model via :meth:`mto.save ` and restore via :meth:`mto.restore ` -*To find out more about Sparsity and related concepts, please refer to the section below* +*To find out more about Sparsity and related concepts, please refer to the section on* :ref:`Sparsity Concepts `. .. _sparsity-pts: @@ -37,7 +37,7 @@ config and a sparsity format as input and returns a sparse model. The sparsity c dictionary specifying the layers to sparsify and the optional dataloader for calibration in data-driven sparsity, e.g., SparseGPT. -:meth:`mts.sparsify` supports (`NVIDIA ASP <1_>`_) and `SparseGPT <2_>`_ methods for magnitude-based +:meth:`mts.sparsify` supports `NVIDIA ASP <1_>`_ and `SparseGPT <2_>`_ methods for magnitude-based and data-driven sparsity, respectively. Example usage: diff --git a/_sources/guides/_basic_quantization.rst.txt b/_sources/guides/_basic_quantization.rst.txt index c35041d..cb46438 100644 --- a/_sources/guides/_basic_quantization.rst.txt +++ b/_sources/guides/_basic_quantization.rst.txt @@ -10,7 +10,7 @@ Precision format **************** The precision format defines the bit-width of the quantized values. Generally, there are integer formats (sign bit + mantissa bits) and floating-point formats (sign bit + exponent bits + mantissa -bits). `FP8 FORMATS FOR DEEP LEARNING `_ provides a detailed +bits). `Fp8 Formats for Deep Learning `_ provides a detailed explanation of the floating-point formats. Scaling factor diff --git a/_sources/guides/_onnx_quantization.rst.txt b/_sources/guides/_onnx_quantization.rst.txt index ecdc15f..5a426c7 100644 --- a/_sources/guides/_onnx_quantization.rst.txt +++ b/_sources/guides/_onnx_quantization.rst.txt @@ -8,7 +8,7 @@ ModelOpt provides ONNX quantization that works together with `TensorRT Explicit #. White-box design allowing expert users to customize the quantization process. #. Better support for vision transformers. -Currently ONNX quantization only supports INT8 quantization. +Currently ONNX quantization supports INT4 and INT8 quantization. .. note:: @@ -18,7 +18,7 @@ Currently ONNX quantization only supports INT8 quantization. Requirements ============ -#. TensorRT >= 8.6 ( >= 9.1 preferred). Please refer to `TensorRT 9.1 download link `_. +#. TensorRT >= 8.6 ( >= 10.0 preferred). Please refer to `TensorRT 10.0 download link `_. @@ -29,11 +29,7 @@ PTQ should be done with a calibration dataset. If calibration dataset is not pro Prepare calibration dataset --------------------------- -ModelOpt supports two types of calibration data format: image directory or numpy file. - -Image directory only works for single-input ONNX models. - -Numpy file works for both single-input and multi-input ONNX models. In the case of multi-input ONNX models, the numpy file should be a dictionary with keys as input names and values as numpy arrays. +ModelOpt supports npz/npy file as calibration data format and that numpy file should be a dictionary with keys as model input names and values as numpy arrays. .. code-block:: python diff --git a/_sources/guides/_pytorch_quantization.rst.txt b/_sources/guides/_pytorch_quantization.rst.txt index 0ba87ca..28d6262 100644 --- a/_sources/guides/_pytorch_quantization.rst.txt +++ b/_sources/guides/_pytorch_quantization.rst.txt @@ -2,8 +2,6 @@ PyTorch Quantization ==================== -ModelOpt PyTorch quantization is refactored based on `pytorch_quantization `_. - Key advantages offered by ModelOpt's PyTorch quantization: #. Support advanced quantization formats, e.g., Block-wise Int4 and FP8. @@ -69,7 +67,7 @@ To verify that the quantizer nodes are placed correctly in the model, let's prin # Print quantization summary after successfully quantizing the model with mtq.quantize # This will show the quantizers inserted in the model and their configurations - mtq.print_quantization_summary(model) + mtq.print_quant_summary(model) After PTQ, the model can be exported to ONNX with the normal PyTorch ONNX export flow. @@ -167,24 +165,31 @@ Under the hood, ModelOpt :meth:`mtq.quantize() ` (quantizer modules) into the model layers like linear layer, conv layer etc. and patches their forward method to perform quantization. -To create :class:`TensorQuantizer` instance, you need to specify :class:`QuantDescriptor `, which -describes the quantization parameters like quantization bits, axis etc. +The quantization parameters are as described in :class:`QuantizerAttributeConfig `. +They can be set at initialization by passing :class:`QuantizerAttributeConfig ` +or later by calling :meth:`TensorQuantizer.set_from_attribute_config() `. +If the quantization parameters are not set explicitly, the quantizer will use the default values. Here is an example of creating a quantizer module: .. code-block:: python - from modelopt.torch.quantization.tensor_quant import QuantDescriptor + from modelopt.torch.quantization.config import QuantizerAttributeConfig from modelopt.torch.quantization.nn import TensorQuantizer - # Create quantizer descriptor - quant_desc = QuantDescriptor(num_bits=8, axis=(-1,), unsigned=True) - - # Create quantizer module - quantizer = TensorQuantizer(quant_desc) + # Create quantizer module with default quantization parameters + quantizer = TensorQuantizer() quant_x = quantizer(x) # Quantize input x + # Create quantizer module with custom quantization parameters + # Example setting for INT4 block-wise quantization + quantizer_custom = TensorQuantizer(QuantizerAttributeConfig(num_bits=4, block_sizes={-1: 128})) + + # Quantize input with custom quantization parameters + quant_x = quantizer_custom(x) # Quantize input x + + .. _customize_quantizer_config: Customize quantizer config @@ -276,3 +281,12 @@ Weight folding avoids repeated quantization of weights during each inferece forw .. note:: After weight folding, the model can no longer be exported to ONNX or fine-tuned with QAT. + +Migrate from pytorch_quantization +================================= + +ModelOpt PyTorch quantization is refactored from and extends upon +`pytorch_quantization `_. + +Previous users of ``pytorch_quantization`` can simply migrate to ``modelopt.torch.quantization`` by +replacing the import statements. diff --git a/_sources/reference/0_versions.rst.txt b/_sources/reference/0_versions.rst.txt index 58dc03f..18daecf 100644 --- a/_sources/reference/0_versions.rst.txt +++ b/_sources/reference/0_versions.rst.txt @@ -1 +1,5 @@ +========= +Changelog +========= + .. include:: ../../../CHANGELOG.rst diff --git a/_sources/reference/generated/modelopt.deploy.llm.rst.txt b/_sources/reference/generated/modelopt.deploy.llm.rst.txt index 7a09bbe..ea34ed7 100644 --- a/_sources/reference/generated/modelopt.deploy.llm.rst.txt +++ b/_sources/reference/generated/modelopt.deploy.llm.rst.txt @@ -17,9 +17,6 @@ llm modelopt.deploy.llm.generate - modelopt.deploy.llm.model_config_trt - - modelopt.deploy.llm.nemo_utils diff --git a/_sources/reference/generated/modelopt.onnx.op_types.rst.txt b/_sources/reference/generated/modelopt.onnx.op_types.rst.txt index 9325558..861cec3 100644 --- a/_sources/reference/generated/modelopt.onnx.op_types.rst.txt +++ b/_sources/reference/generated/modelopt.onnx.op_types.rst.txt @@ -39,7 +39,6 @@ op\_types .. autosummary:: :nosignatures: - get_quantizable_op_types is_binary_op is_control_flow_op is_conversion_op diff --git a/_sources/reference/generated/modelopt.onnx.quantization.calib_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.calib_utils.rst.txt index 8c56bf3..de103a8 100644 --- a/_sources/reference/generated/modelopt.onnx.quantization.calib_utils.rst.txt +++ b/_sources/reference/generated/modelopt.onnx.quantization.calib_utils.rst.txt @@ -42,4 +42,11 @@ calib\_utils .. Overview table of available functions in the module + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + import_scales_from_calib_cache + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.extensions.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.extensions.rst.txt new file mode 100644 index 0000000..cba39d7 --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.extensions.rst.txt @@ -0,0 +1,37 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +extensions +========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization.extensions + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.deploy.llm.model_config_trt.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.fp8.rst.txt similarity index 87% rename from _sources/reference/generated/modelopt.deploy.llm.model_config_trt.rst.txt rename to _sources/reference/generated/modelopt.onnx.quantization.fp8.rst.txt index 856ecca..b002df1 100644 --- a/_sources/reference/generated/modelopt.deploy.llm.model_config_trt.rst.txt +++ b/_sources/reference/generated/modelopt.onnx.quantization.fp8.rst.txt @@ -1,7 +1,7 @@ .. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst -model\_config\_trt -================== +fp8 +=== .. List the submodules @@ -17,7 +17,7 @@ model\_config\_trt __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, add in ``YYY.__module__ = __name__``. -.. automodule:: modelopt.deploy.llm.model_config_trt +.. automodule:: modelopt.onnx.quantization.fp8 :members: :undoc-members: @@ -39,7 +39,6 @@ model\_config\_trt .. autosummary:: :nosignatures: - build_tensorrt_llm - build_tensorrt_llm_rank + quantize \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.graph_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.graph_utils.rst.txt index 68090db..7da4a25 100644 --- a/_sources/reference/generated/modelopt.onnx.quantization.graph_utils.rst.txt +++ b/_sources/reference/generated/modelopt.onnx.quantization.graph_utils.rst.txt @@ -39,12 +39,18 @@ graph\_utils .. autosummary:: :nosignatures: + add_fp16_fp32_cast build_non_residual_input_map classify_partition_nodes filter_quantizable_kgen_heads + find_fp8_mha_partitions + find_mha_partitions + find_nodes_to_exclude get_fusible_backbone has_const_input has_path_type + insert_fp8_mha_casts + insert_matmul_casts is_const_input print_stat remove_partial_input_qdq diff --git a/_sources/reference/generated/modelopt.onnx.quantization.int4.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.int4.rst.txt index d02940e..5bc72b5 100644 --- a/_sources/reference/generated/modelopt.onnx.quantization.int4.rst.txt +++ b/_sources/reference/generated/modelopt.onnx.quantization.int4.rst.txt @@ -49,9 +49,9 @@ int4 dq_tensor find_scales quant_tensor - quantize_int4 - quantize_int4_awq_clip - quantize_int4_rtn + quantize + quantize_awq_clip + quantize_rtn rtn \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.int8.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.int8.rst.txt new file mode 100644 index 0000000..41ad99e --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.int8.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +int8 +==== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization.int8 + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + quantize + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.ort_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.ort_utils.rst.txt index 9ac6f96..39d1b86 100644 --- a/_sources/reference/generated/modelopt.onnx.quantization.ort_utils.rst.txt +++ b/_sources/reference/generated/modelopt.onnx.quantization.ort_utils.rst.txt @@ -39,6 +39,8 @@ ort\_utils .. autosummary:: :nosignatures: + configure_ort create_inference_session + get_quantizable_op_types \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.qdq_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.qdq_utils.rst.txt index 4609774..36e220b 100644 --- a/_sources/reference/generated/modelopt.onnx.quantization.qdq_utils.rst.txt +++ b/_sources/reference/generated/modelopt.onnx.quantization.qdq_utils.rst.txt @@ -48,6 +48,7 @@ qdq\_utils make_gs_quantized_weight make_gs_scale make_gs_zp + replace_scale_values use_trt_qdq_ops \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.quant_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.quant_utils.rst.txt index caf23db..72e2912 100644 --- a/_sources/reference/generated/modelopt.onnx.quantization.quant_utils.rst.txt +++ b/_sources/reference/generated/modelopt.onnx.quantization.quant_utils.rst.txt @@ -39,6 +39,7 @@ quant\_utils .. autosummary:: :nosignatures: + pack_float32_to_4bit_cpp_based pack_float32_to_4bit_optimized \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.quantize.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.quantize.rst.txt index 6bf0871..bcaeb1e 100644 --- a/_sources/reference/generated/modelopt.onnx.quantization.quantize.rst.txt +++ b/_sources/reference/generated/modelopt.onnx.quantization.quantize.rst.txt @@ -1,44 +1,6 @@ -.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst +modelopt.onnx.quantization.quantize +=================================== -quantize -======== +.. currentmodule:: modelopt.onnx.quantization -.. List the submodules - - - - - -.. Autodoc anything defined in the module itself - - TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED - We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported - For reimports that should be documented somewhere other than where they are defined, the re-imports - __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, - add in ``YYY.__module__ = __name__``. - -.. automodule:: modelopt.onnx.quantization.quantize - :members: - :undoc-members: - - .. Also show members without docstrings. Only members from __all__ are considered as per conf.py - .. Ideally we should add docstrings for these members. - - - .. Overview table of available classes in the module - - - - - - .. Overview table of available functions in the module - - - .. rubric:: Functions - - .. autosummary:: - :nosignatures: - - quantize - - \ No newline at end of file +.. autofunction:: quantize \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.rst.txt index 3af2c1a..00aa2fe 100644 --- a/_sources/reference/generated/modelopt.onnx.quantization.rst.txt +++ b/_sources/reference/generated/modelopt.onnx.quantization.rst.txt @@ -17,6 +17,12 @@ quantization modelopt.onnx.quantization.calib_utils + modelopt.onnx.quantization.extensions + + + modelopt.onnx.quantization.fp8 + + modelopt.onnx.quantization.graph_utils @@ -26,6 +32,9 @@ quantization modelopt.onnx.quantization.int4 + modelopt.onnx.quantization.int8 + + modelopt.onnx.quantization.operators diff --git a/_sources/reference/generated/modelopt.onnx.utils.rst.txt b/_sources/reference/generated/modelopt.onnx.utils.rst.txt index cbb0252..a853c6a 100644 --- a/_sources/reference/generated/modelopt.onnx.utils.rst.txt +++ b/_sources/reference/generated/modelopt.onnx.utils.rst.txt @@ -39,7 +39,7 @@ utils .. autosummary:: :nosignatures: - duplicate_shared_linear_weights + duplicate_shared_constants find_lowest_common_ancestor gen_random_inputs get_all_input_names @@ -64,6 +64,7 @@ utils remove_weights_data save_onnx save_onnx_bytes_to_dir + udpate_domain validate_batch_size validate_onnx diff --git a/_sources/reference/generated/modelopt.torch.distill.config.rst.txt b/_sources/reference/generated/modelopt.torch.distill.config.rst.txt new file mode 100644 index 0000000..20278a4 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.distill.config.rst.txt @@ -0,0 +1,37 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +config +====== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.distill.config + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.distill.distillation.rst.txt b/_sources/reference/generated/modelopt.torch.distill.distillation.rst.txt new file mode 100644 index 0000000..5a7d6cf --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.distill.distillation.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +distillation +============ + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.distill.distillation + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + convert + export + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.distill.distillation_model.rst.txt b/_sources/reference/generated/modelopt.torch.distill.distillation_model.rst.txt new file mode 100644 index 0000000..8456a96 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.distill.distillation_model.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +distillation\_model +=================== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.distill.distillation_model + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + DistillationModel + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.distill.loss_balancers.rst.txt b/_sources/reference/generated/modelopt.torch.distill.loss_balancers.rst.txt new file mode 100644 index 0000000..55859fb --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.distill.loss_balancers.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +loss\_balancers +=============== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.distill.loss_balancers + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + DistillationLossBalancer + StaticLossBalancer + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.distill.losses.rst.txt b/_sources/reference/generated/modelopt.torch.distill.losses.rst.txt new file mode 100644 index 0000000..1875289 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.distill.losses.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +losses +====== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.distill.losses + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + LogitsDistillationLoss + MGDLoss + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.distill.mode.rst.txt b/_sources/reference/generated/modelopt.torch.distill.mode.rst.txt new file mode 100644 index 0000000..06a7dff --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.distill.mode.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +mode +==== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.distill.mode + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + ExportStudentModeDescriptor + KnowledgeDistillationModeDescriptor + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.distill.registry.rst.txt b/_sources/reference/generated/modelopt.torch.distill.registry.rst.txt new file mode 100644 index 0000000..765d2e7 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.distill.registry.rst.txt @@ -0,0 +1,37 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +registry +======== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.distill.registry + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.distill.rst.txt b/_sources/reference/generated/modelopt.torch.distill.rst.txt new file mode 100644 index 0000000..55b95ce --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.distill.rst.txt @@ -0,0 +1,65 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +distill +======= + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.torch.distill.config + + + modelopt.torch.distill.distillation + + + modelopt.torch.distill.distillation_model + + + modelopt.torch.distill.loss_balancers + + + modelopt.torch.distill.losses + + + modelopt.torch.distill.mode + + + modelopt.torch.distill.registry + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.distill + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.distribute.rst.txt b/_sources/reference/generated/modelopt.torch.export.distribute.rst.txt index 970af8c..e929ca8 100644 --- a/_sources/reference/generated/modelopt.torch.export.distribute.rst.txt +++ b/_sources/reference/generated/modelopt.torch.export.distribute.rst.txt @@ -46,11 +46,7 @@ distribute .. autosummary:: :nosignatures: - barrier get_configs_parallel - get_group - get_rank get_tensors_parallel - get_world_size \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.hf_config_map.rst.txt b/_sources/reference/generated/modelopt.torch.export.hf_config_map.rst.txt new file mode 100644 index 0000000..292c226 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.export.hf_config_map.rst.txt @@ -0,0 +1,37 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +hf\_config\_map +=============== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.export.hf_config_map + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.layer_utils.rst.txt b/_sources/reference/generated/modelopt.torch.export.layer_utils.rst.txt index 9747b72..14b7ea9 100644 --- a/_sources/reference/generated/modelopt.torch.export.layer_utils.rst.txt +++ b/_sources/reference/generated/modelopt.torch.export.layer_utils.rst.txt @@ -40,19 +40,23 @@ layer\_utils :nosignatures: build_attention_config + build_conv_config build_decoder_config build_embedding_config build_layernorm_config build_linear_config + build_medusa_heads_config build_mlp_config build_moe_config build_qkv + build_recurrent_config build_stacked_experts check_model_compatibility get_activation_scaling_factor get_kv_cache_dtype get_kv_cache_scaling_factor get_prequant_scaling_factor + get_quantization_format get_scaling_factor get_transformer_layers get_weight_block_size @@ -65,5 +69,7 @@ layer\_utils is_linear is_mlp is_moe + is_quantlinear + is_recurrent \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.model_config.rst.txt b/_sources/reference/generated/modelopt.torch.export.model_config.rst.txt index d5dd2f8..aeb2ae3 100644 --- a/_sources/reference/generated/modelopt.torch.export.model_config.rst.txt +++ b/_sources/reference/generated/modelopt.torch.export.model_config.rst.txt @@ -34,15 +34,20 @@ model\_config :nosignatures: AttentionConfig + ConvConfig DecoderLayerConfig EmbeddingConfig ExpertConfig LayernormConfig + LinearActConfig LinearConfig MLPConfig MOEConfig + MedusaHeadConfig ModelConfig QKVConfig + RecurrentConfig + RgLruConfig diff --git a/_sources/reference/generated/modelopt.torch.export.model_config_export.rst.txt b/_sources/reference/generated/modelopt.torch.export.model_config_export.rst.txt index 8cdc857..7caee45 100644 --- a/_sources/reference/generated/modelopt.torch.export.model_config_export.rst.txt +++ b/_sources/reference/generated/modelopt.torch.export.model_config_export.rst.txt @@ -39,6 +39,7 @@ model\_config\_export .. autosummary:: :nosignatures: + export_hf_checkpoint export_tensorrt_llm_checkpoint torch_to_tensorrt_llm_checkpoint diff --git a/_sources/reference/generated/modelopt.torch.export.rst.txt b/_sources/reference/generated/modelopt.torch.export.rst.txt index 0a5dccb..409c8f8 100644 --- a/_sources/reference/generated/modelopt.torch.export.rst.txt +++ b/_sources/reference/generated/modelopt.torch.export.rst.txt @@ -17,6 +17,9 @@ export modelopt.torch.export.distribute + modelopt.torch.export.hf_config_map + + modelopt.torch.export.layer_utils @@ -41,6 +44,9 @@ export modelopt.torch.export.transformer_engine + modelopt.torch.export.vllm + + .. Autodoc anything defined in the module itself diff --git a/_sources/reference/generated/modelopt.torch.export.scaling_factor_utils.rst.txt b/_sources/reference/generated/modelopt.torch.export.scaling_factor_utils.rst.txt index c8b0cbe..4d2e9ee 100644 --- a/_sources/reference/generated/modelopt.torch.export.scaling_factor_utils.rst.txt +++ b/_sources/reference/generated/modelopt.torch.export.scaling_factor_utils.rst.txt @@ -39,6 +39,7 @@ scaling\_factor\_utils .. autosummary:: :nosignatures: + adjust_attn_amax_values get_weights_scaling_factor resmooth_and_get_scale diff --git a/_sources/reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst.txt b/_sources/reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst.txt index e7b943c..b68f1fe 100644 --- a/_sources/reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst.txt +++ b/_sources/reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst.txt @@ -41,6 +41,8 @@ tensorrt\_llm\_utils convert_to_tensorrt_llm_config is_tensorrt_llm_0_8_or_9 + prepare_enc_dec_decoder_layer + prepare_enc_dec_export_dir weights_to_npz \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.vllm.rst.txt b/_sources/reference/generated/modelopt.torch.export.vllm.rst.txt new file mode 100644 index 0000000..824a66e --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.export.vllm.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +vllm +==== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.export.vllm + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + export_to_vllm + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.opt.utils.rst.txt b/_sources/reference/generated/modelopt.torch.opt.utils.rst.txt index d815dcc..557a53d 100644 --- a/_sources/reference/generated/modelopt.torch.opt.utils.rst.txt +++ b/_sources/reference/generated/modelopt.torch.opt.utils.rst.txt @@ -39,6 +39,7 @@ utils .. autosummary:: :nosignatures: + get_hparam is_configurable is_dynamic named_hparams diff --git a/_sources/reference/generated/modelopt.torch.quantization.algorithms.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.algorithms.rst.txt new file mode 100644 index 0000000..8b5faf5 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.algorithms.rst.txt @@ -0,0 +1,46 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +algorithms +========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.algorithms + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + AutoQuantizeSearcher + QuantRecipe + QuantRecipeHparam + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.extensions.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.extensions.rst.txt index bab84af..3a8f7bc 100644 --- a/_sources/reference/generated/modelopt.torch.quantization.extensions.rst.txt +++ b/_sources/reference/generated/modelopt.torch.quantization.extensions.rst.txt @@ -34,4 +34,12 @@ extensions .. Overview table of available functions in the module + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + get_cuda_ext + get_cuda_ext_fp8 + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.model_quant.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.model_quant.rst.txt index 6fcdfab..c4a3c87 100644 --- a/_sources/reference/generated/modelopt.torch.quantization.model_quant.rst.txt +++ b/_sources/reference/generated/modelopt.torch.quantization.model_quant.rst.txt @@ -40,6 +40,7 @@ model\_quant :nosignatures: quantize + auto_quantize disable_quantizer enable_quantizer print_quant_summary diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.rst.txt new file mode 100644 index 0000000..f793965 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.rst.txt @@ -0,0 +1,58 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +quant\_rnn +========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.nn.modules.quant_rnn + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + QuantRNNBase + QuantRNNFullBase + RNNLayerForward + VFRNNForward + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + get_quantized_rnn_layer_forward + get_quantized_rnn_layer_variable_len_forward + get_quantized_rnn_layer_variable_len_reverse_forward + lstm_cell_with_proj + quantized_cell_forward + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.rst.txt index 2e2017a..653feb0 100644 --- a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.rst.txt +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.rst.txt @@ -38,6 +38,9 @@ modules modelopt.torch.quantization.nn.modules.quant_pooling + modelopt.torch.quantization.nn.modules.quant_rnn + + modelopt.torch.quantization.nn.modules.tensor_quantizer diff --git a/_sources/reference/generated/modelopt.torch.quantization.plugins.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.plugins.rst.txt index f6dad09..3abad44 100644 --- a/_sources/reference/generated/modelopt.torch.quantization.plugins.rst.txt +++ b/_sources/reference/generated/modelopt.torch.quantization.plugins.rst.txt @@ -22,6 +22,7 @@ plugins + .. Autodoc anything defined in the module itself TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED diff --git a/_sources/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.rst.txt new file mode 100644 index 0000000..b9f001e --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +base\_qtensor +============= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.qtensor.base_qtensor + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + BaseQuantizedTensor + QTensorWrapper + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.rst.txt new file mode 100644 index 0000000..48e1aee --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +int4\_tensor +============ + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.qtensor.int4_tensor + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + INT4QTensor + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.rst.txt new file mode 100644 index 0000000..8262774 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +nf4\_tensor +=========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.qtensor.nf4_tensor + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + NF4QTensor + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.qtensor.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.qtensor.rst.txt new file mode 100644 index 0000000..9cb69a0 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.qtensor.rst.txt @@ -0,0 +1,53 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +qtensor +======= + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.torch.quantization.qtensor.base_qtensor + + + modelopt.torch.quantization.qtensor.int4_tensor + + + modelopt.torch.quantization.qtensor.nf4_tensor + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.qtensor + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.rst.txt index f1dbf86..32b4657 100644 --- a/_sources/reference/generated/modelopt.torch.quantization.rst.txt +++ b/_sources/reference/generated/modelopt.torch.quantization.rst.txt @@ -14,6 +14,9 @@ quantization :recursive: + modelopt.torch.quantization.algorithms + + modelopt.torch.quantization.calib @@ -44,6 +47,9 @@ quantization modelopt.torch.quantization.plugins + modelopt.torch.quantization.qtensor + + modelopt.torch.quantization.quant_modules diff --git a/_sources/reference/generated/modelopt.torch.quantization.tensor_quant.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.tensor_quant.rst.txt index 71f1b1f..2602dcb 100644 --- a/_sources/reference/generated/modelopt.torch.quantization.tensor_quant.rst.txt +++ b/_sources/reference/generated/modelopt.torch.quantization.tensor_quant.rst.txt @@ -36,9 +36,7 @@ tensor\_quant FakeAffineTensorQuantFunction FakeTensorQuantFunction LegacyFakeTensorQuantFunction - QuantDescriptor ScaledE4M3Function - ScaledQuantDescriptor TensorQuantFunction diff --git a/_sources/reference/generated/modelopt.torch.rst.txt b/_sources/reference/generated/modelopt.torch.rst.txt index a00a1df..036a86d 100644 --- a/_sources/reference/generated/modelopt.torch.rst.txt +++ b/_sources/reference/generated/modelopt.torch.rst.txt @@ -14,6 +14,9 @@ torch :recursive: + modelopt.torch.distill + + modelopt.torch.export diff --git a/_sources/reference/generated/modelopt.torch.utils.distributed.rst.txt b/_sources/reference/generated/modelopt.torch.utils.distributed.rst.txt index ffca0fb..1757440 100644 --- a/_sources/reference/generated/modelopt.torch.utils.distributed.rst.txt +++ b/_sources/reference/generated/modelopt.torch.utils.distributed.rst.txt @@ -40,13 +40,15 @@ distributed :nosignatures: backend - size - rank - is_master barrier - set_data_parallel_group - set_tensor_parallel_group get_data_parallel_group get_tensor_parallel_group + is_available + is_initialized + is_master + rank + set_data_parallel_group + set_tensor_parallel_group + size \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.utils.network.rst.txt b/_sources/reference/generated/modelopt.torch.utils.network.rst.txt index 45d6122..9fa96d7 100644 --- a/_sources/reference/generated/modelopt.torch.utils.network.rst.txt +++ b/_sources/reference/generated/modelopt.torch.utils.network.rst.txt @@ -51,6 +51,7 @@ network param_num param_num_from_forward remove_bn + run_forward_loop set_submodule standardize_model_args standardize_model_like_tuple @@ -58,6 +59,6 @@ network standardize_constructor_args unwrap_model zero_grad - run_forward_loop + create_param_grad_clear_hook \ No newline at end of file diff --git a/_sources/support/1_contact.rst.txt b/_sources/support/1_contact.rst.txt index 54e47ae..ae60244 100644 --- a/_sources/support/1_contact.rst.txt +++ b/_sources/support/1_contact.rst.txt @@ -3,5 +3,4 @@ Contact us ========== -You may raise an issue on `GitHub `_ -for any questions or issues you may have. +Contact us by submitting issues on `GitHub `_. diff --git a/_sources/support/2_faqs.rst.txt b/_sources/support/2_faqs.rst.txt index 59da8dd..bf9b9bc 100644 --- a/_sources/support/2_faqs.rst.txt +++ b/_sources/support/2_faqs.rst.txt @@ -3,6 +3,9 @@ FAQs ==== +Known Issues +============ + 1. Potential memory leak for ``FSDP`` with ``use_orig_params=True`` ------------------------------------------------------------------- diff --git a/deployment/1_tensorrt_llm_deployment.html b/deployment/1_tensorrt_llm_deployment.html index 43529e5..149ae47 100644 --- a/deployment/1_tensorrt_llm_deployment.html +++ b/deployment/1_tensorrt_llm_deployment.html @@ -4,7 +4,7 @@ - TensorRT-LLM Deployment — Model Optimizer 0.11.2 + TensorRT-LLM Deployment — Model Optimizer 0.15.0 @@ -13,11 +13,11 @@ - + - + @@ -36,23 +36,23 @@ - - + + - +